/*--------------------------------------------------------
----------------------------------------------------------
Data preparation and sample restriction
----------------------------------------------------------
--------------------------------------------------------*/
	
* Clean and generate variables of interest
	loaddata
	save ${pathtemp}data_all.dta, replace		
	keep if e_beta!=.
	
	* add stars when e_tstat==. & e_se==. & e_pvalue==.
	replace e_pvalue = 0.05 if e_tstat==. & e_se==. & e_pvalue==. & e_stars=="*"
	replace e_pvalue = 0.01 if e_tstat==. & e_se==. & e_pvalue==. & e_stars=="**"
	replace e_pvalue = 0.001 if e_tstat==. & e_se==. & e_pvalue==. & e_stars=="***"
	
	* tstat, se, pvalue
	replace e_tstat = abs(e_beta/e_se) if e_tstat ==. 
	replace e_pvalue = exp(-(0.717 * abs(e_tstat)) - (0.416*(e_tstat*e_tstat))) if e_pvalue==. // https://www.bmj.com/content/343/bmj.d2304 
	replace e_se=. if e_se==0
	replace e_se = abs(e_beta/e_tstat) if e_se ==. 	
	replace e_se=. if e_se==0
	su e_tstat e_se e_pvalue	
	
	drop if e_se >100 & e_se!=. // removing 5 outliers (p_id=8 et p_id=69)
	
	* inverse standard error  
	gen e_ise = 1/e_se
	lab var e_ise "Inverse standard error"
	
	* total effect
	gen e_total_effect = .
	lab var e_total_effect "Total effect - Dustmann et al JEP 2016"
	replace e_total_effect = 0 if e_approach =="mixture"
	replace e_total_effect = 0 if e_approach =="national skill-cell"
	replace e_total_effect = 1 if e_approach =="pure spatial"
	
	tab e_approach
	gen e_approach2 = e_approach
	replace e_approach2="Approach: Mixture (ref. national skill-cell)" if e_approach=="mixture"
	replace e_approach2="Approach: National skill-cell" if e_approach=="national skill-cell"
	replace e_approach2="Approach: Pure spatial (ref. national skill-cell)" if e_approach=="pure spatial"
	replace e_approach2="Approach: Other (ref. national skill-cell)" if e_approach=="other"
	encode e_approach2, gen(e_approach_enc)
	drop e_approach2
	
	* set of high quality journals
	g p_lead_j = 0
	lab var p_lead_j "Leading academic journal"
	replace p_lead_j = 1 if p_pub_source=="american economic review" | ///
		  p_pub_source=="journal of political economy" | ///
		  p_pub_source=="review of economic studies" | ///
		  p_pub_source=="quarterly journal of economics"  | ///
		  p_pub_source=="journal of the european economic association" | ///
		  p_pub_source=="journal of labor economics" | /// 
		  inlist(p_pub_source, "economic journal", "european economic review","journal of economic geography", "journal of labor economics"," journal of population economics", "journal of economic history", "journal of health economics", "world development")
		  * no obs for econometrica
	
	* nb of estimates by papers
	bys p_id: egen temp =max(e_id)
	gen p_ln_nb_est = log(temp)
	drop temp 
	lab var p_ln_nb_est "(log) Nr of estimates"
	
	* nb of years in the sample
	gen temp  = e_year_max  - e_year_min + 1 
	gen e_ln_lenght = log(temp)
	drop temp 
	lab var e_ln_lenght "(log) Nr of years in the sample"
	
	* delete outliers using Grubbs correction
	cap ssc install grubbs 	
	count if e_beta !=. 
	
	grubbs e_beta if e_beta_type=="ela", gen(temp) 
	replace e_beta = . if temp==1
	drop temp
	count if e_beta !=. 
	
	grubbs e_beta if e_beta_type=="semi", gen(temp) 
	replace e_beta = . if temp==1
	drop temp
	count if e_beta !=. 
	
	* sample size
	destring e_sample_size, gen(temp) force
	gen e_large_sample = .
	replace e_large_sample = 1 if e_sample_size=="large" & e_sample_size!=""
	replace e_large_sample = 1 if temp>=10000 & e_large_sample==.
	replace e_large_sample = 0 if temp<10000 & e_large_sample==.
	drop temp
	lab var e_large_sample "Large dataset (at least 10,000 obs.)"
	
	* residual wage 
	replace e_resid_wage = "0" if e_resid_wage=="no"
	replace e_resid_wage = "1" if e_resid_wage=="yes"
	destring e_resid_wage, replace
	
	* cross sectional data
	gen byte p_cross = (p_data_stru=="cross-section")
	lab var p_cross "Cross-sectional data"
	
 	* individual data
 	gen byte e_indiv = (e_obs_unit=="indiv")
 	lab var e_indiv "Individual-level analysis"
		
	* country dummies
	tab e_country
	replace e_country = "germany" if e_country =="west germany"
	
	gen byte e_us = (e_country=="the us")
	lab var e_us "The U.S."
	
	gen byte e_anglo = (e_country=="the us" | e_country=="the uk" | e_country=="australia"  | e_country=="canada")
	lab var e_anglo "Anglo-Saxon country"
	replace e_anglo = . if e_country=="several"
	
	gen byte e_dvping = (e_country=="colombia" | e_country=="costa rica" | e_country=="malaysia"  | e_country=="peru" | e_country=="south africa" | e_country=="thailand" | e_country=="peru" | e_country=="peru")
	lab var e_dvping "Developing country"
	replace e_dvping = . if e_country=="several"
	
	* sectoral composition of countries - 
	preserve 
		keep e_country e_year_min e_year_max
		gen e_year_m = trunc( (e_year_min + e_year_max)/2) 
		duplicates drop 
		drop if e_country=="several"
		sort e_country e_year_min
		keep e_country e_year_min
		duplicates drop
	restore 
	
	tostring e_year_min, replace
	
	gen e_perc_manuf = .
	replace e_perc_manuf = 	.	if e_country=="australia" & e_year_min=="1980"
	replace e_perc_manuf = 	.	if e_country=="australia" & e_year_min=="1982"
	replace e_perc_manuf = 	13.788630180651	if e_country=="australia" & e_year_min=="1990"
	replace e_perc_manuf = 	13.1145557289965	if e_country=="australia" & e_year_min=="1995"
	replace e_perc_manuf = 	10.9763613899876	if e_country=="australia" & e_year_min=="2001"
	replace e_perc_manuf = 	10.8540818918502	if e_country=="australia" & e_year_min=="2003"
	replace e_perc_manuf = 	20.0575873942549	if e_country=="austria" & e_year_min=="1987"
	replace e_perc_manuf = 	20.1542723002662	if e_country=="austria" & e_year_min=="1988"
	replace e_perc_manuf = 	19.8099526491026	if e_country=="austria" & e_year_min=="1991"
	replace e_perc_manuf = 	.	if e_country=="canada" & e_year_min=="1911"
	replace e_perc_manuf = 	.	if e_country=="canada" & e_year_min=="1971"
	replace e_perc_manuf = 	.	if e_country=="canada" & e_year_min=="1991"
	replace e_perc_manuf = 	.	if e_country=="canada" & e_year_min=="1996"
	replace e_perc_manuf = 	12.6623563037307	if e_country=="colombia" & e_year_min=="2013"
	replace e_perc_manuf = 	12.2693186420816	if e_country=="colombia" & e_year_min=="2014"
	replace e_perc_manuf = 	19.4580146817806	if e_country=="costa rica" & e_year_min=="1997"
	replace e_perc_manuf = 	14.3240695721764	if e_country=="denmark" & e_year_min=="1993"
	replace e_perc_manuf = 	14.7297329410964    if e_country=="denmark" & e_year_min=="1995"
	replace e_perc_manuf = 	14.6429328057672	if e_country=="denmark" & e_year_min=="1997"
	replace e_perc_manuf = 	21.7748036570423	if e_country=="france" & e_year_min=="1962"
	replace e_perc_manuf = 	20.5848020383081	if e_country=="france" & e_year_min=="1968"
	replace e_perc_manuf = 	19.6191940642076	if e_country=="france" & e_year_min=="1975"
	replace e_perc_manuf = 	17.30126225117		if e_country=="france" & e_year_min=="1982"
	replace e_perc_manuf = 	16.0260477551757	if e_country=="france" & e_year_min=="1990"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1975"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1980"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1981"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1984"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1986"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1987"
	replace e_perc_manuf = 	.	if e_country=="germany" & e_year_min=="1990"
	replace e_perc_manuf = 	19.733013232671     if e_country=="germany" & e_year_min=="1996"
	replace e_perc_manuf = 	19.8946207261156	if e_country=="germany" & e_year_min=="1997"
	replace e_perc_manuf = 	20.2840359494004	if e_country=="germany" & e_year_min=="2000"
	replace e_perc_manuf = 	10.0373756358854	if e_country=="greece" & e_year_min=="1999"
	replace e_perc_manuf = 	23.5315335583771	if e_country=="ireland" & e_year_min=="1999"
	replace e_perc_manuf = 	.	if e_country=="israel" & e_year_min=="1989"
	replace e_perc_manuf = 	.	if e_country=="israel" & e_year_min=="1994"
	replace e_perc_manuf = 	.	if e_country=="italy" & e_year_min=="1985"
	replace e_perc_manuf = 	13.6576018738131	if e_country=="italy" & e_year_min=="2009"
	replace e_perc_manuf = 	26.1225839420447	if e_country=="malaysia" & e_year_min=="2007"
	replace e_perc_manuf = 	15.5517417388538	if e_country=="new zealand" & e_year_min=="2002"
	replace e_perc_manuf = 	11.1535400403028	if e_country=="norway" & e_year_min=="1989"
	replace e_perc_manuf = 	10.1970888507262	if e_country=="norway" & e_year_min=="1993"
	replace e_perc_manuf = 	10.0638871611269	if e_country=="norway" & e_year_min=="1996"
	replace e_perc_manuf = 	10.538437014204	if e_country=="norway" & e_year_min=="1998"
	replace e_perc_manuf = 	13.3440281131691	if e_country=="peru" & e_year_min=="2016"
	replace e_perc_manuf = 	20.2672862924698	if e_country=="south africa" & e_year_min=="1996"
	replace e_perc_manuf = 	19.0369578655221	if e_country=="south africa" & e_year_min=="2001"
	replace e_perc_manuf = 	.	if e_country=="south korea" & e_year_min=="2013"
	replace e_perc_manuf = 	15.1898514693015	if e_country=="spain" & e_year_min=="2002"
	replace e_perc_manuf = 	29.4189680545022	if e_country=="thailand" & e_year_min=="2004"
	replace e_perc_manuf = 	13.5394687009685	if e_country=="the netherlands" & e_year_min=="1997"
	replace e_perc_manuf = 	13.4938544571332	if e_country=="the netherlands" & e_year_min=="1998"
	replace e_perc_manuf = 	.	if e_country=="the uk" & e_year_min=="1983"
	replace e_perc_manuf = 	15.870870613089	if e_country=="the uk" & e_year_min=="1992"
	replace e_perc_manuf = 	14.9484217095568	if e_country=="the uk" & e_year_min=="1997"
	replace e_perc_manuf = 	12.4763662221292	if e_country=="the uk" & e_year_min=="2001"
	replace e_perc_manuf = 	10.9499431816893	if e_country=="the uk" & e_year_min=="2004"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1831"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1900"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1960"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1968"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1970"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1971"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1979"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1980"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1981"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1982"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1989"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1990"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1991"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1992"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1994"
	replace e_perc_manuf = 	.	if e_country=="the us" & e_year_min=="1995"
	replace e_perc_manuf = 	15.1150690882174	if e_country=="the us" & e_year_min=="2000"
	
	lab var e_perc_manuf "Share of manufacturing sector (%GDP), WDI"
	
	gen e_perc_agri = .
	replace e_perc_agri = 	.	if e_country=="australia" & e_year_min=="1980"
	replace e_perc_agri = 	.	if e_country=="australia" & e_year_min=="1982"
	replace e_perc_agri = 	4.19344556378381	if e_country=="australia" & e_year_min=="1990"
	replace e_perc_agri = 	3.00960378827297	if e_country=="australia" & e_year_min=="1995"
	replace e_perc_agri = 	3.49688013321987	if e_country=="australia" & e_year_min=="2001"
	replace e_perc_agri = 	2.88740742216489	if e_country=="australia" & e_year_min=="2003"
	replace e_perc_agri = 	3.2366358109153		if e_country=="austria" & e_year_min=="1987"
	replace e_perc_agri = 	3.08577996292332	if e_country=="austria" & e_year_min=="1988"
	replace e_perc_agri = 	2.84569687770774	if e_country=="austria" & e_year_min=="1991"
	replace e_perc_agri = 	.	if e_country=="canada" & e_year_min=="1911"
	replace e_perc_agri = 	.	if e_country=="canada" & e_year_min=="1971"
	replace e_perc_agri = 	.	if e_country=="canada" & e_year_min=="1991"
	replace e_perc_agri = 	.	if e_country=="canada" & e_year_min=="1996"
	replace e_perc_agri = 	5.39271495449472	if e_country=="colombia" & e_year_min=="2013"
	replace e_perc_agri = 	5.44695721474421	if e_country=="colombia" & e_year_min=="2014"
	replace e_perc_agri = 	12.5030913360301	if e_country=="costa rica" & e_year_min=="1997"
	replace e_perc_agri = 	2.6666867919779	if e_country=="denmark" & e_year_min=="1993"
	replace e_perc_agri = 	2.83084838376944	if e_country=="denmark" & e_year_min=="1995"
	replace e_perc_agri = 	2.613583999477	if e_country=="denmark" & e_year_min=="1997"
	replace e_perc_agri = 	9.23619230083179	if e_country=="france" & e_year_min=="1962"
	replace e_perc_agri = 	6.91648392568544	if e_country=="france" & e_year_min=="1968"
	replace e_perc_agri = 	4.5506229662513	if e_country=="france" & e_year_min=="1975"
	replace e_perc_agri = 	3.84065071640816	if e_country=="france" & e_year_min=="1982"
	replace e_perc_agri = 	2.97794544466016	if e_country=="france" & e_year_min=="1990"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1975"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1980"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1981"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1984"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1986"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1987"
	replace e_perc_agri = 	.	if e_country=="germany" & e_year_min=="1990"
	replace e_perc_agri = 	1.06716928692911	if e_country=="germany" & e_year_min=="1996"
	replace e_perc_agri = 	1.03610244105776	if e_country=="germany" & e_year_min=="1997"
	replace e_perc_agri = 	0.996168402468	if e_country=="germany" & e_year_min=="2000"
	replace e_perc_agri = 	5.77247210059032	if e_country=="greece" & e_year_min=="1999"
	replace e_perc_agri = 	2.57229167369639	if e_country=="ireland" & e_year_min=="1999"
	replace e_perc_agri = 	.	if e_country=="israel" & e_year_min=="1989"
	replace e_perc_agri = 	.	if e_country=="israel" & e_year_min=="1994"
	replace e_perc_agri = 	.	if e_country=="italy" & e_year_min=="1985"
	replace e_perc_agri = 	1.80208177882956	if e_country=="italy" & e_year_min=="2009"
	replace e_perc_agri = 	9.98677367962245	if e_country=="malaysia" & e_year_min=="2007"
	replace e_perc_agri = 	5.85289352793662	if e_country=="new zealand" & e_year_min=="2002"
	replace e_perc_agri = 	2.9051586428011	if e_country=="norway" & e_year_min=="1989"
	replace e_perc_agri = 	2.6094194418758	if e_country=="norway" & e_year_min=="1993"
	replace e_perc_agri = 	2.22713791586389	if e_country=="norway" & e_year_min=="1996"
	replace e_perc_agri = 	2.26324523087473	if e_country=="norway" & e_year_min=="1998"
	replace e_perc_agri = 	6.93503461650105	if e_country=="peru" & e_year_min=="2016"
	replace e_perc_agri = 	3.41658824480889	if e_country=="south africa" & e_year_min=="1996"
	replace e_perc_agri = 	2.8124516945524	if e_country=="south africa" & e_year_min=="2001"
	replace e_perc_agri = 	.	if e_country=="south korea" & e_year_min=="2013"
	replace e_perc_agri = 	3.48212189760772	if e_country=="spain" & e_year_min=="2002"
	replace e_perc_agri = 	9.29111013904814	if e_country=="thailand" & e_year_min=="2004"
	replace e_perc_agri = 	2.97435481253043	if e_country=="the netherlands" & e_year_min=="1997"
	replace e_perc_agri = 	2.56560268869083	if e_country=="the netherlands" & e_year_min=="1998"
	replace e_perc_agri = 	.	if e_country=="the uk" & e_year_min=="1983"
	replace e_perc_agri = 	1.22301322582085	if e_country=="the uk" & e_year_min=="1992"
	replace e_perc_agri = 	0.936537987528909	if e_country=="the uk" & e_year_min=="1997"
	replace e_perc_agri = 	0.759683632381875	if e_country=="the uk" & e_year_min=="2001"
	replace e_perc_agri = 	0.728468960115285	if e_country=="the uk" & e_year_min=="2004"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1831"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1900"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1960"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1968"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1970"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1971"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1979"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1980"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1981"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1982"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1989"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1990"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1991"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1992"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1994"
	replace e_perc_agri = 	.	if e_country=="the us" & e_year_min=="1995"
	replace e_perc_agri = 	1.1505549143143	if e_country=="the us" & e_year_min=="2000"

	lab var e_perc_agri "Share of agricultural sector (%GDP), WDI"

	* years
	destring e_year_min, replace
	
	* decade dummies
	g p_decade = .
	replace p_decade =1 if p_pub_year <1980 
	replace p_decade =2 if (p_pub_year >=1980 & p_pub_year <1990)
	replace p_decade =3 if (p_pub_year >=1990 & p_pub_year <2000)
	replace p_decade =4 if (p_pub_year >=2000 & p_pub_year <2010)
	replace p_decade =5 if (p_pub_year >=2010 & p_pub_year <2020)
	replace p_decade =6 if (p_pub_year >=2020)
	lab var p_decade "Decade of publication"
	
	* mid sample year 
	g e_mid_year = (e_year_min+e_year_max)/2
	
	* time periods 
	gen e_period = "Sample mid-year: Before 1973"
	replace e_period = "Sample mid-year: 1973-2007 (ref. before 1973)" if e_mid_year >=1973
	replace e_period = "Sample mid-year: After 2007 (ref. before 1973)" if e_mid_year >=2007
	lab var e_period "Sample mid-year"
	encode e_period, gen(e_period_enc)
	
	* reference papers 
	gen p_longhi2005 = 0
	replace p_longhi2005 = 1 if p_id==1 | ///
		p_id==2 | ///
		p_id==7 | ///
		p_id==18 | ///
		p_id==27 | ///
		p_id==31 | ///
		p_id==34 | ///
		p_id==71 | ///
		p_id==90 | ///
		p_id==100 | ///
		p_id==103 
	lab var p_longhi2005 "The paper is part of Longhi et al 2005"
				
	/*
	11 out of 18 papers wrt the study of Longhi et al. (2005)
	
		18	immigrants, minorities, and labor market competition
		27	native wage impacts of foreign labor: a random effects panel analysis
		34	effects of immigrants on the 1980-1990 u.s. wage experience
		90	immigration and the earnings of young native workers
		-	the factor-market consequences of unskilled immigration to the united states.
		-	do immigrants reduce natives' wages? evidence from germany
		100	the impact of immigration on the labor market for native-born workers: incorporating the dynamics of internal migration
		2	east-west trade and migration: the austro-german case
		7	immigrant inflows, native outflows, and the local labor market impacts of higher immigration
		103	the impact of mass migration on the israeli labor market
		31	the impact of immigration on the earnings of natives: evidence from australian micro data
		71	the labor demand curve is downward sloping: reexamining the impact of immigration on the labor market
		1	wage and mobility effects of trade and migration on the austrian labour market
	*/
	
	gen p_dustmann2016 = 0
	replace p_dustmann2016 = 1 if p_id==71 | ///
		p_id==63 | ///
		p_id==11 | ///
		p_id==102 | ///
		p_id==104 | ///
		p_id==7 | ///
		p_id==88 | ///
		p_id==15 | ///
		p_id==29 | ///
		p_id==10 
	lab var p_dustmann2016 "The paper is part of Dustmann et al 2016"

	/*
	10/26 papers wrt the study of Dustmann et al 2016
	
		71	the labor demand curve is downward sloping: reexamining the impact of immigration on the labor market
		63	the impact of immigration on the british labour market
		11	how immigration affects u.s. cities
		-	the effect of internal migration on local labormarkets: american cities during the great depression
		102	the effect of immigration along the distribution of wages
		-	the wage impact of the marielitos: a reappraisal
		29	labor supply shocks, native, wages, and the adjustment of local employment.
		-	the labor market effects of a refugee wave: applying the synthetic control method to the mariel boatlift
		104	immigrants' effect on native workers: new analysis on longitudinal data
		-	labor market adjustments to increased immigration
		7	immigrant inflows, native outflows, and the local labor market impacts of higher immigration
		88	native internal migration and the labor market impact of immigration
		10	the labor market impact of immigration: a quasi-experiment exploiting immigrant location rules in germany
		15	immigrant versus natives ? displacement and job creation
		-	rethinking the effect of immigration on wages
		-	the impact of immigration on the structure of wages: theory and evidence from britain	
	*/
	
	* Borjas dummy
	gen p_borjas = 0
	replace p_borjas = 1 if p_author1=="borjas"
	replace p_borjas = 1 if p_author2=="borjas"
	replace p_borjas = 1 if p_author3=="borjas"
	tab p_borjas
	
	* skill levels
	tab e_immig_skill
	gen e_immig_skill2 = e_immig_skill
	replace e_immig_skill2 = "Immigrants' skills: all (ref. high)" if e_immig_skill=="all"
	replace e_immig_skill2 = "Immigrants' skills: high" if e_immig_skill=="high"
	replace e_immig_skill2 = "Immigrants' skills: low (ref. high)" if e_immig_skill=="low"
	replace e_immig_skill2 = "Immigrants' skills: medium (ref. high)" if e_immig_skill=="medium"
	encode e_immig_skill2, gen(e_immig_skill_enc)
	drop e_immig_skill2
	
	tab e_wage_skill
	gen e_wage_skill2 = e_wage_skill
	replace e_wage_skill2 = "Natives' skills: all (ref. high)" if e_wage_skill=="all"
	replace e_wage_skill2 = "Natives' skills: high" if e_wage_skill=="high"
	replace e_wage_skill2 = "Natives' skills: low (ref. high)" if e_wage_skill=="low"
	replace e_wage_skill2 = "Natives' skills: medium (ref. high)" if e_wage_skill=="medium"
	encode e_wage_skill2, gen(e_wage_skill_enc)
	drop e_wage_skill2
	
	* estimator 
	gen e_estimator2 = e_estimator
	tab e_estimator
	replace e_estimator2="Estimator: IV-2SLS (ref. OLS)" if e_estimator=="iv-2sls"
	replace e_estimator2="Estimator: OLS" if e_estimator=="ols"
	replace e_estimator2="Estimator: Other (ref. OLS)" if e_estimator=="other"
	encode e_estimator2, gen(e_estimator_enc)
	drop e_estimator2
	
	* elsaticity /  semi-elasticity
	gen e_beta_type2 = e_beta_type
	replace e_beta_type2 = "Estimate: Elasticity (ref. semi-elasticity)" if e_beta_type2=="ela"
	replace e_beta_type2 = "Estimate: Other (ref. semi-elasticity)" if e_beta_type2=="other"
	replace e_beta_type2 = "Estimate: Semi-elasticity" if e_beta_type2=="semi"
	encode e_beta_type2, gen(e_beta_type_enc)
	drop e_beta_type2
	
	* gender
	encode e_wage_gender, gen(e_wage_gender_enc)
	
	* wage frequency
	replace e_wage_freq = "unknown" if e_wage_freq==""
	tab e_wage_freq
	replace e_wage_freq = "Wage var. freq.: Other (ref. hourly)" if e_wage_freq=="daily"
	replace e_wage_freq = "Wage var. freq.: Hourly" if e_wage_freq=="hourly"
	replace e_wage_freq = "Wage var. freq.: Monthly (ref. hourly)" if e_wage_freq=="monthly"
	replace e_wage_freq = "Wage var. freq.: Other (ref. hourly)" if e_wage_freq=="quarterly"
	replace e_wage_freq = "Wage var. freq.: Other (ref. hourly)" if e_wage_freq=="unknown"
	replace e_wage_freq = "Wage var. freq.: Weekly (ref. hourly)" if e_wage_freq=="weekly"
	replace e_wage_freq = "Wage var. freq.: Yearly (ref. hourly)" if e_wage_freq=="yearly"
	encode e_wage_freq, gen(e_wage_freq_enc)
	   
	* shift-share
	replace e_shiftshare = 0 if e_shiftshare== . & e_estimator=="iv-2sls"
	gen e_shiftshare_esti = e_shiftshare 
	replace e_shiftshare_esti = 0 if e_estimator != "iv-2sls"
	lab var e_shiftshare_esti "Shift-share IV (for IV-2SLS estimators)"
	tab e_shiftshare_esti 
	
	tab p_shiftshare_discussed 
	replace p_shiftshare_discussed = 1 if p_shiftshare_discussed==2
	gen e_shiftsharedis_esti  = p_shiftshare_discussed 
	replace e_shiftsharedis_esti = 0 if e_shiftsharedis_esti != 1
	lab var e_shiftsharedis_esti "Shift-share IV discussed (for IV-2SLS estimators)"
	tab e_shiftsharedis_esti
	
	* covariates
	replace e_key_cov_un_empl = 0 if e_covariates ==0
	
	* immigration variable
	gen e_immig_level = e_immig_format
	replace e_immig_level = "Immig. var. format: Level (ref. share)" if e_immig_format=="log nr" | e_immig_format=="nr" 
	replace e_immig_level = "Immig. var. format: Rate (ref. share)" if e_immig_format=="log rate" | e_immig_format=="rate" 
	replace e_immig_level = "Immig. var. format: Share (ref. share)" if e_immig_format=="log share" | e_immig_format=="share" 
	encode e_immig_level, gen(e_immig_level_enc)
	lab var e_immig_level "Immig. var. format"
	
	replace e_immig_def = "unknown" if e_immig_def==""
	replace e_immig_def = "citizenship" if e_immig_def=="birth & citizenship"
	gen e_immig_def2 = e_immig_def
	tab e_immig_def2
	replace e_immig_def2="Immig. var. def: Birth" if e_immig_def=="birth"
	replace e_immig_def2="Immig. var. def: Citizenship (ref. birth)" if e_immig_def=="citizenship"
	replace e_immig_def2="Immig. var. def: Other (ref. birth)" if e_immig_def=="ethnicity"
	replace e_immig_def2="Immig. var. def: Other (ref. birth)" if e_immig_def=="previous residence"
	replace e_immig_def2="Immig. var. def: Other (ref. birth)" if e_immig_def=="unknown"
	encode e_immig_def2, gen(e_immig_def_enc)
	lab var e_immig_def_enc "Immigration variable definition"
	drop e_immig_def2
		
	save ${pathtemp}data_all.dta, replace
	
		
* Prepare dataset with elasticities only
	use ${pathtemp}data_all.dta, clear
	gen temp=.
	replace temp = e_beta if e_beta_type =="ela" & temp==.
	replace temp = e_reported_elasticity if temp==.
	replace temp = e_beta*(e_val_immig) if e_beta_type=="semi" & temp==.
	lab var temp "Estimate converted into elasticity"
	count if temp!=.
	keep if temp!=.
	drop e_beta
	rename temp e_beta 
	keep if e_beta!=.
	
	* drop outliers
	drop if e_beta >3
	drop if e_beta <-3
	
	grubbs e_beta, gen(temp) 
	replace e_beta = . if temp==1
	drop temp
	count if e_beta !=. 
	
	drop if e_beta==.
	su e_beta
	unique p_id
	bys p_id: egen nr_e = max(e_id)
	gen ln_nr_e = log(nr_e)
	lab var ln_nr_e "(log) Nr of estimates"
	egen id = group(p_id e_id)
	
	meta set e_beta e_se, studylabel(id) studysize() eslabel(Effect size (elasticity)) 
		
	save ${pathtemp}data_ela.dta, replace
	
	
* Prepare dataset with semi-elasticities only
	use ${pathtemp}data_all.dta, clear
	gen temp = . 
	replace temp = e_beta if e_beta_type =="semi" & temp==.
	replace temp = e_beta*(1/e_val_immig) if e_beta_type =="ela" & temp==.
	lab var temp "Estimate converted into semi-elasticity ('effect size' in Longhi et al 2005)"
	count if temp!=.
	keep if temp!=.
	drop e_beta
	rename temp e_beta 
	keep if e_beta!=.
	
	* drop outliers
	grubbs e_beta, gen(temp) 
	replace e_beta = . if temp==1
	drop temp
	count if e_beta !=. 
	
	drop if e_beta==.
	su e_beta
	unique p_id
	bys p_id: egen nr_e = max(e_id)
	gen ln_nr_e = log(nr_e)
	lab var ln_nr_e "(log) Nr of estimates"
	egen id = group(p_id e_id)
	
	meta set e_beta e_se, studylabel(id) studysize() eslabel(Effect size (semi-elasticity)) 
	
	save ${pathtemp}data_semi.dta, replace
	
	
* Prepare dataset with estimates in the form of both elasticies and semi-elsaticities
	use ${pathtemp}data_all.dta, clear
	gen semi = . 
	replace semi = e_beta if e_beta_type =="semi" & semi==.
	replace semi = e_beta*(1/e_val_immig) if e_beta_type =="ela" & semi==.
	lab var semi "Estimate converted into semi-elasticity ('effect size' in Longhi et al 2005)"
	
	gen ela=.
	replace ela = e_beta if e_beta_type =="ela" & ela==.
	replace ela = e_reported_elasticity if ela==.
	replace ela = e_beta*(e_val_immig) if e_beta_type=="semi" & ela==.
	lab var ela "Estimate converted into elasticity"
	
	* drop outliers
	drop if ela >3
	drop if ela <-3
	grubbs ela, gen(temp) 
	replace ela = . if temp==1
	drop temp
	
	keep if ela!=. & semi!=.
	unique p_id
	bys p_id: egen nr_e = max(e_id)
	gen ln_nr_e = log(nr_e)
	lab var ln_nr_e "(log) Nr of estimates"
	
	save ${pathtemp}data_ela_semi.dta, replace
	
	
* Prepare dataset with all estimates: elasticies, semi-elsaticities and other estimates
	use ${pathtemp}data_all.dta, clear
	drop if e_beta==.
	su e_beta
	unique p_id
	bys p_id: egen nr_e = max(e_id)
	gen ln_nr_e = log(nr_e)
	lab var ln_nr_e "(log) Nr of estimates"
	egen id = group(p_id e_id)
	
	meta set e_beta e_se, studylabel(id) studysize() eslabel(Effect size) 
	
	save ${pathtemp}data_all.dta, replace

	

