/// first we cleaning the data by calculating variables given in tenge to USD label variable selfempl "number of self-employed in a region" label variable sme "number of SMEs in a region" ren gdp_city gdp_city2 gen gdp_city= ((gdp_city2/ cpi2005)*100)/ exchange_usd label variable gdp_city "GDP. mln USD 2005 const" gen museumvisitors=museumvisitors10000*10000 drop museumvisitors10000 gen museumvisitors2=museumvisitors/10 drop museumvisitors ren museumvisitors2 museumvisitors label variable museumvisitors "Number of museum visitors" ren capital_investm capital_investm2 gen capital_investm= ((capital_investm2/ cpi2005)*100)/ exchange_usd gen capital_invest=capital_investm/1000 label variable capital_invest "capital investment mln USD 2005 const" gen construct= (((construction2/ cpi2005)*100)/ exchange_usd) label variable construct "construction mln USD 2005 const" gen IT_services= ((it_services/ cpi2005)*100)/ exchange_usd label variable IT_services "Info & communic services mln USD 2005 const" gen IT_devlp= ((it_developmet/ cpi2005)*100)/ exchange_usd label variable IT_devlp "Investmjent in development software and other ICT mln USD 2005 const" label variable sharegdp "Regional share in GDP" gen telecom=(communication/gdp_city2)*100 label variable telecom " % of city GDP generated by telecommunics" ren transportation transport gen transportation=(transport/gdp_city2)*100 label variable transportation " % of city GDP generated by transportation" gen construction=((construction2/1000)/gdp_city2)*100 label variable construction " % of city GDP generated by construction" ren agriculture agricult gen agriculture=(agricult/gdp_city2)*100 label variable agriculture " % of city GDP generated by agriculture" ren mining_manuf manuf gen mining_manuf =(manuf/gdp_city2)*100 label variable mining_manuf " % of city GDP generated by manufacturing" gen capital_investm_gdp=capital_invest/gdp_city label variable capital_investm_gdp "capital investment to GDP ratio" gen ICT=((it_services+it_developmet)/(gdp_city2))*100 label variable ICT " % of city GDP generated by ICT " gen lngdp_city=ln(gdp_city) label variable lngdp_city "log of gdp_city" /// creating regions as in WB/EBRD data gen area=1 if city==206 replace area=2 if city==213 replace area=3 if city==200 | city==201 | city==207 | city==211 | city==212 | city==214 replace area=4 if city==202 | city==205 | city==208 | city==210 | city==215 replace area=5 if city==203 | city==204 | city==209 label define reg 1 "Center" 2 "East" 3 "North" 4 "South" 5 "West", modify label values area reg label variable area "one of 5 aggregated regions by WB/EBRD" /// generating variable region for WB/ EBRD data to match gen KZ=1 if country=="Kazakhstan2009" | country=="Kazakhstan2013" gen area=1 if a2x=="Center" & KZ==1 replace area=2 if a2x=="East" & KZ==1 replace area=3 if a2x=="North" & KZ==1 replace area=4 if a2x=="South" & KZ==1 replace area=5 if a2x=="West" & KZ==1 label define reg 1 "Center" 2 "East" 3 "North" 4 "South" 5 "West", modify label values area reg label variable area "one of 5 aggregated regions by WB/EBRD" gen gdp_pc=(gdp_city/population)*1000 label variable gdp_pc "GDP pr capita in thsd USD 2005 const prices" gen sme_density=(sme/population)*1000 label variable sme_density "# of SMEs per 1000 residents in region" label variable mortality "Mortality Rate per 1000 people" gen RD_intens=(RD/gdp_city) label variable RD_intens "R&D expenditure to GDP in region" gen RD= (((RD_exp/ cpi2005)*100)/ exchange_usd) label variable RD "R&D expenditure mln USD 2005 const" ///// now we need to create variables egen = mean - averages at the level of 5 aggregated regions and only those varoiables will be macthed to firm level data twoway scatter gdp_pc sme_density twoway scatter gdp_pc l2.sme_density twoway scatter gdp_pc ICT, mlabel(town) twoway scatter gdp_pc l2.ICT bysort city: egen ICT_avg=mean(ICT) bysort city: egen GDP_pc_avg=mean(gdp_pc) bysort city: egen sme_dens_avg=mean(sme_density) twoway scatter GDP_pc_avg ICT_avg, mlabel(town) twoway scatter GDP_pc_avg sme_dens_avg, mlabel(town) //// descriptives sum population employm unemploym university patents innov_products mortality students /// physicians hospitalbeds poor sqmliving museumvisitors capital_invest capital_investm_gdp RD_intens gdp_pc sme_density /// indutry split sum telecom transportation agriculture mining_manuf construction ICT /// we need to look descriptive of industries mean by each of region by specific indicators in mean table town year, c(mean GDP_pc mean ICT mean sme_density) /// analysis////////////// pwcorr lngdp_city university patents innov_products ICT construction mining_manuf agriculture telecom transportation capital_investm_gdp sharegdp, star(.05) xtset city year xtreg lngdp_city l.selfempl l.sme patents innov_products ICT construction agriculture telecom transportation capital_investm_gdp i.year, vce(cluster city) gen region=city label define region 200 "Akmola region" 201 "Aktobe region" 202 "Almaty region" 203 "Atyrau region" 204 "West Kazakhstan region" 205 "Jambyl region" 206 "Karaganda region" 207 "Kostanay region" 208 "Kyzylorda region" 209 "Mangystau region" 210 "South Kazakhstan region" 211 "Pavlodar region" 212 "North Kazakhstan region" 213 "East Kazakhstan region" 214 "Astana city" 215 "Almaty city" label values region region label variable life_expct "Life Expectancy" label variable students "Number of Students" label variable physicians "Doctors per 10.000 visits" label variable hospitalbeds "Hospital beds per 10.000 people" label variable poor "Poverty. %" label variable sqmliving "Sq meters per resident" label variable transport "Transport availability, tenge" label variable employm "Employment, people" label variable unemploym "Unemployment. %" label variable university "Number of Universities" label variable patents "Number of Patents" label variable innov_products "Innovative Production (% of GDP)" label variable region "Administrative region" label variable doctor_visits "Doctor visits per 10.000 people" label variable communication "Regional communication" label variable transportation "Regional transportation" label variable agricult "Regional agriculture" label variable manuf "Regional manufacturing" label variable year "year"