/*--------------------------------------------------------
----------------------------------------------------------
Section 3 - Data and Empirical Strategy
----------------------------------------------------------
--------------------------------------------------------*/

**# LIST OF PAPERS	
	
	*--------------------------------------------------------
	* Appendix A
	* List of papers
	*--------------------------------------------------------
	use ${pathtemp}data_all.dta, clear
	
	unique p_id
	
	keep p_id p_title p_author1 p_author2 p_author3 p_pub_year e_approach e_total_effect 
	tab e_approach, gen(e_approach_) 
	drop e_approach
	bys p_id: egen total_effect_d = max(e_total_effect)
	bys p_id: egen e_approach_1_d = max(e_approach_1)
	bys p_id: egen e_approach_2_d = max(e_approach_2)
	bys p_id: egen e_approach_3_d = max(e_approach_3)
	bys p_id: egen e_approach_4_d = max(e_approach_4)
	drop e_total_effect e_approach_1 e_approach_2 e_approach_3 e_approach_4
	duplicates drop 
	count

	drop p_id
	gen year = "(" +  string(p_pub_year, "%03.0f") + ")"
	drop p_pub_year
	gen and = ""
	replace and = "\&" if p_author2!=""
	replace and = "et al." if p_author3!=""
	drop p_author3
	replace p_author2="" if and=="et al."

	replace p_author1 = proper(p_author1)
	replace p_author2 = proper(p_author2)
	replace p_title = proper(p_title)

	rename e_approach_1_d mixture
	rename e_approach_2_d national
	rename e_approach_3_d other
	rename e_approach_4_d spatial
	
	
	order p_author1 and p_author2 year p_title national spatial mixture other total_effect_d, first
	sort p_author1 p_author2 year 
	unique p_title
	export excel using "${output}tableA1.xls", firstrow(variables) replace

	
	
	
**# STATISTICS

	*--------------------------------------------------------
	* Descriptives statistics for Section 3
	*--------------------------------------------------------
	cap log close
	log using "${output}descriptives_statistics.log", replace
		
		usedata_all
			unique p_id
		
		usedata_semi
			unique p_id
		
		usedata_ela
			unique p_id
		
		usedata_ela_semi
			unique p_id
		
		usedata_all
			su e_beta
			distinct p_id
			
			tab p_lead_j
			
			unique e_country if e_country!="several"
			
			tab e_country if e_country!="several"
			tab e_anglo if e_country!="several"
			
			tab p_cross
			
			tab e_estimator
			tab e_shiftshare		
		
	log close
		
		
		
	
**# TABLES
	
	*--------------------------------------------------------
	* Table A2
	* Descriptive statistics
	*--------------------------------------------------------
	cap log close
	log using "${output}tableA2.log", replace	
		
		* Stats on the study
		usedata_all
		
		unique p_id
	
		qui tab p_pub_type, gen(p_pub_type_dum)
		rename p_pub_type_dum1 book_chapter
		rename p_pub_type_dum2 journal_article
		rename p_pub_type_dum3 working_paper
		
		global var_p "p_author_nr p_pub_year p_lead_j journal_article book_chapter working_paper nr_e p_theory p_mobility p_nat_exp" 
		keep p_id $var_p 
		duplicates drop 
		
		su $var_p 
		latabstat $var_p, s(mean sd min max n) columns(statistics) f(%9.3f) 

		* Stats on the estimate
		usedata_all
		
		qui tab e_period_enc, gen(e_period_enc_dum)
		rename e_period_enc_dum1 period_1973_2007
		rename e_period_enc_dum2 period_after2007
		rename e_period_enc_dum3 period_before1973
		
		qui tab e_beta_type, gen(e_beta_type_dum)
		rename e_beta_type_dum1 form_ela
		rename e_beta_type_dum2 form_semi
		rename e_beta_type_dum3 form_other
		
		qui tab e_approach_enc, gen(e_approach_enc_dum)   
		rename e_approach_enc_dum1 approach_mixture
		rename e_approach_enc_dum2 approach_national
		rename e_approach_enc_dum3 approach_other
		rename e_approach_enc_dum4 approach_pure
		
		qui tab e_estimator_enc, gen(e_estimator_enc_dum)   
		rename e_estimator_enc_dum1 estim_iv2sls
		rename e_estimator_enc_dum2 estim_ols
		rename e_estimator_enc_dum3 estim_other
		
		replace e_shiftsharedis = . if e_shiftshare !=1
		
		qui tab e_wage_freq_enc, gen(dum)
		rename dum1 wage_freq_hourly
		rename dum2 wage_freq_monthly
		rename dum3 wage_freq_other
		rename dum4 wage_freq_weekly
		rename dum5 wage_freq_yearly

		qui tab e_wage_pop, gen(dum)
		rename dum1 wage_pop_all
		rename dum2 wage_pop_natives

		qui tab e_wage_gender, gen(dum)
		rename dum1 wage_gender_both
		rename dum2 wage_gender_females
		rename dum3 wage_gender_males

		qui tab e_wage_skill, gen(dum)
		rename dum1 wage_skill_all
		rename dum2 wage_skill_high
		rename dum3 wage_skill_low
		rename dum4 wage_skill_med

		qui tab e_immig_def_enc, gen(dum)
		rename dum1 immig_def_birth
		rename dum2 immig_def_citizen
		rename dum3 immig_def_other

		qui tab e_immig_level_enc, gen(dum)
		rename dum1 immig_form_level
		rename dum2 immig_form_rate
		rename dum3 immig_form_share

		qui tab e_immig_skill_enc, gen(dum)
		rename dum1 immig_skill_all
		rename dum2 immig_skill_high
		rename dum3 immig_skill_low
		rename dum4 immig_skill_med
		
		global var_e "e_beta e_se e_baseline e_large_sample e_indiv e_cell p_cross e_anglo e_us e_dvping period_* form_* approach_* e_covariates e_resid_wage estim_* e_shiftshare e_shiftsharedis"
		
		keep $var_e 
		
		su $var_e
		latabstat $var_e, s(mean sd min max n) columns(statistics) f(%9.3f)
		
	cap log close
	
	
	*--------------------------------------------------------
	* Table A3
	* Description of the variables
	*--------------------------------------------------------
	clear 
	set obs 1
	g str code1=""
	g str code2=""
	g nb_obs=.
	g str code0=""
	save "${output}table_descriptions.dta", replace 

	usedata_all
	
	local var_tokeep "p_pub_type p_data_stru e_period e_beta_type e_approach e_estimator e_wage_freq e_wage_pop e_wage_gender e_wage_skill e_immig_def e_immig_level e_immig_skill" 	
	keep `var_tokeep' 
	
	local rank= 0
	foreach z in `var_tokeep'  {
		local rank = `rank' +1 
		preserve
		g variable_to_be_counted = 1 
		g str comment = ""
		local j : var lab `z'
		replace comment = "`j'"
		collapse (sum) variable_to_be_counted (first) comment, by(`z')
		rename variable_to_be_counted  nb_obs 
		rename `z' value
		tostring value, replace force
		g code1 ="`z'"
		bys code1 : egen check=sum(nb_obs)
		g code0 = "`y'"
		order code0 code1 nb_obs comment
		append using "${output}table_descriptions.dta", force
		save "${output}table_descriptions.dta", replace 
		restore
	}
	

	use "${output}table_descriptions.dta", clear 
	order code0 code1 nb_obs comment  
	tab code1
	
	drop if value =="."
		
	rename code1 Variable
	rename nb_obs Obs
	rename value Categories
	rename comment Description 
	
	bys Variable: gen temp = _n
	replace Description="" if temp>1
	keep Categories Description Obs 
	order Description Categories Obs, first
	drop if Obs==.
	texsave Description Categories Obs using "${output}tableA3.tex", replace 
	export excel Categories Description Obs using "${output}tableA3.xls", firstrow(variables) replace

	erase "${output}table_descriptions.dta"

	


**# FIGURES
 
	*--------------------------------------------------------
	* Figure 1
	* Scientific Production on the Wage Effect of Immigration Over Time
	*--------------------------------------------------------	 
	 
	* # of studies
	usedata_all
	keep p_pub_year p_id
	duplicates drop
	gen count = 1
	
	collapse (sum) count, by(p_pub_year)
	tab p_pub_year
	su count
	sort p_pub_year
	tw bar count p_pub_year, barwidth(0.2) color(navy) xline(2001, lpattern(dash) lcolor(red) ) xline(2003, lpattern(shortdash) lcolor(red))  xaxis(1 2) xla(2003 "L" 2001 "C", axis(2)) ///
		graphregion(fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(4) xsize(5) ///
		xtitle("Publication year", axis(1) ) xtitle("", axis(2) )   ytitle("Number of studies") yscale(range(0 6))
	graph export "${output}figure1a.pdf", as(pdf) replace


	* # of estimates
	usedata_all
	keep p_pub_year p_id e_beta
	replace e_beta = 1
	collapse (sum) e_beta, by(p_pub_year p_id)
	collapse (sum) e_beta, by(p_pub_year)
	tab p_pub_year
	su e_beta
	sort p_pub_year
	tw bar e_beta p_pub_year , barwidth(0.2) color(navy) xline(2001, lpattern(dash) lcolor(red) ) xline(2003, lpattern(shortdash) lcolor(red))   xaxis(1 2) xla(2003 "L" 2001 "C", axis(2)) ///
		graphregion(fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(4) xsize(5) ///
		xtitle("Publication year", axis(1) ) xtitle("", axis(2) )  ytitle("Number of estimates") yscale(range(0 180))
	graph export "${output}figure1b.pdf", as(pdf) replace



	*--------------------------------------------------------
	* Figure 2
	* Beta distribution plot
	*-------------------------------------------------------	
	cap log close
	log using "${output}figure2_statistics.log", replace	
	
		usedata_all
		su e_beta
		local  n_obs = `r(N)'
		distinct p_id
		local  n_paper = `r(ndistinct)'

		tw (hist e_beta, width(0.3) lcolor(gs10) fcolor(gs12)),  ///
			xline(0, lcolor(black) lpattern(dash) lstyle(foreground) lw(thick) ) 	///
			graphregion (fcolor(white) lcolor(white)) ///
			plotregion(fcolor(white) lcolor(white)) ysize(4) xsize(5) ///
			legend(rows(1)) /// 
			xtitle("") 
		graph export "${output}figure2.pdf", as(pdf) replace
		

		usedata_semi
		su e_beta 
		local  n_obs = `r(N)'
		distinct p_id
		local  n_paper = `r(ndistinct)'

		tw (hist e_beta, width(0.15) lcolor(gs10) fcolor(gs12)),  ///
			xline(0, lcolor(black) lpattern(dash) lstyle(foreground) lw(thick) ) 	///
			graphregion (fcolor(white) lcolor(white)) ///
			plotregion(fcolor(white) lcolor(white)) ysize(4) xsize(5) ///
			legend(rows(1)) /// 
			xtitle("") 
		graph export "${output}figureA1a.pdf", as(pdf) replace
		
		usedata_ela
		su e_beta
		local  n_obs = `r(N)'
		distinct p_id
		local  n_paper = `r(ndistinct)'

		tw (hist e_beta, width(0.01) lcolor(gs10) fcolor(gs12)),  ///
			xline(0, lcolor(black) lpattern(dash) lstyle(foreground) lw(thick) ) 	///
			graphregion (fcolor(white) lcolor(white)) ///
			plotregion(fcolor(white) lcolor(white)) ysize(4) xsize(5) ///
			legend(rows(1)) /// 
			xtitle("") 
		graph export "${output}figureA1b.pdf", as(pdf) replace
	
	log close
	 


	*--------------------------------------------------------
	* Figure 3
	* Forest plot
	*--------------------------------------------------------
	usedata_all
	
	* paper summary  
	g sd = e_beta 
	bys p_id : egen lowercl = min(e_beta) 
	bys p_id : egen uppercl = max(e_beta)
	g n_est =1 
	collapse (mean) lower* upper* e_beta (sum) n_est (sd) sd , by(p_id  p_author* p_pub_year p_author_nr)
	g u95=e_beta + (1.96* (sd /sqrt(n_est)))
	g l95=e_beta - (1.96* (sd /sqrt(n_est)))
	sort e_beta 
	gen rank = _n 

	* paper name 
	tostring p_pub_year, replace 
	replace p_author1 = proper(p_author1)
	replace p_author2 = proper(p_author2)
	replace p_author3 = proper(p_author3)
	g str paper_name = p_author1 +", " +p_pub_year if p_author_nr ==1
	replace  paper_name = p_author1 +" and "+p_author2+", " +p_pub_year   if p_author_nr ==2
	replace  paper_name = p_author1 + " et al. " +p_pub_year   if p_author_nr >2
	drop p_author*
	
	* right column
	tostring n_est, replace
	g str rcol = n_est
		
	* graph
	labmask rank, val(paper_name)
	levelsof rank, local(list_papers)

	cap g rank2=rank
	labmask rank2, val(rcol)
	levelsof rank2, local(rcol)

	tw  (rcap l95 u95  rank , hor  mcolor(navy) fcolor(gs10) lcolor(navy) lwidth(vthin)) ///
		(scatter  rank e_beta ,  yaxis(1) mcolor(navy) msymbol(pipe) msize(small)) ///
		(scatter  rank2 e_beta  ,yaxis(2) yscale(axis(2))   mcolor(navy) msymbol(pipe) msize(small)) ///
		,legend (order(2 "Mean estimate" 1 "95% confidence interval" ) region(lcolor(white)) ///
		pos(6) ring(1) cols(3) size(*.5) symysize(*.3) symxsize(*.3)) /// ///
		xline(0, lcolor(gs6) lpattern(dot)) ///
		xtitle("" , size(vsmall)) ytitle("", axis(1) ) ytitle("Number of estimates", size(tiny) axis(2)) ///
		xlab(-7(1)7,  valuelabel nogrid angle(horizontal) labsize(tiny) ) ///
		graphregion (fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(12) xsize(10) ///
		ylab(`list_papers', valuelabel angle(horizontal) labsize(*.3) nogrid noticks axis(1)) ///
		ylab(`rcol', valuelabel angle(horizontal) labsize(*.3) nogrid noticks axis(2) ) 
	
	graph export "${output}figure3.pdf", as(pdf) replace
	
		




	*--------------------------------------------------------
	* Figure 4
	* Sampling Error: z-stat graph and I2 (As in Longhi et al. 2005 and Disdier & Head 2008)
	*--------------------------------------------------------
	usedata_all
	
	* construct the z stat 
	gen var_i = e_se*e_se 
	gen ci = e_beta/var_i 
	g inv_vi = 1/var_i 
	egen sum_ci  = sum(ci)
 	egen sum_inv_vi = sum(inv_vi)
 	g beta_mean = sum_ci / sum_inv_vi  /* weighted mean */ 

	preserve 
		* computing the I2 statistic
		* https://www.medcalc.org/manual/meta-analysis-introduction.php

		* degrees of freedom = number of papers (or estimates ?)
		distinct p_id
		g df_wrong = `r(N)' 

		* start by computing the Q stat
		gen q_i = ((e_beta - beta_mean)^2 )/var_i
		collapse (sum) q_i (mean) df_wrong 
		rename q_i Q 
		g df = df_wrong-1
		gen I2 = 100*((Q-df)/Q)
		replace I2=round(I2, .01) 
		dis Q
		dis I2 
		qui su I2 
		local I2_value = string( `r(mean)',  "%9.3f")
	restore 

 	g z = (e_beta - beta_mean)/e_se 
	g t = e_beta/e_se

	su e_beta
	local  n_obs = `r(N)'

	distinct p_id
	local  n_paper = `r(ndistinct)'

	* restrict the set of values to be plot 
	su z, d
	replace z =.  if z>5
	replace z = .  if z<-5
	su t, d
	replace t =.  if t>5
	replace t = .  if t<-5
	
	* plots the graph
	sum t, det
	twoway (kdensity t , lpattern(dash) lcolor(navy) ) ///
	(function y=normalden(x, 0, 1) , range(`r(p1)' `r(p99)') ) , ///
	xtitle("") ytitle("Density") ///
	note("I{sup:2} value: `I2_value'") ///
	graphregion (fcolor(white) lcolor(white)) ///
	plotregion(fcolor(white) lcolor(white)) ysize(10) xsize(16) ///
	legend (order(1 "Observed t-statistics density"  2 "Normal distribution" ) region(lcolor(white)) pos(6) ring(1) cols(2)) 
	graph export "${output}figure4.pdf", as(pdf) replace




	
