/*--------------------------------------------------------
----------------------------------------------------------
Section 5 - Extensions
----------------------------------------------------------
--------------------------------------------------------*/

	*--------------------------------------------------------	
	* Figure 5: t-stat and sample size	-- Card and Krueger (1995)
	*--------------------------------------------------------
	cap program drop publication_bias1 
	program publication_bias1 
	syntax 
		* implicit t stat 
		g ln_tstat=ln(e_tstat)

		* degrees of freedom /sample size 
		destring e_sample_size, force replace
		gen size = e_sample_size
		replace size = e_cell_size if e_cell == 1
		g size_sqr = size^(1/2)
		g ln_size_sqr = ln(size_sqr)

		* estimation 
		reg ln_tstat ln_size_sqr, ro 
		local n_obs  = e(N)
		local beta = _b[ln_size_sqr]
		local se = _se[ln_size_sqr]
		local main_e_beta = string( `beta',  "%9.3f")
		local main_se = string( `se',  "%9.3f")

		* graph
		twoway  (scatter ln_tstat ln_size_sqr, color(navy)) ///
			(lfit  ln_tstat ln_size_sqr) ///
			,legend (off region(lcolor(white)) ///
			pos(12) ring(1) rows(1) size(vsmall) symysize(small) symxsize(small)) /// ///
			xtitle("(log) Sample size") ytitle("(log) t-statistics") ///
			graphregion (fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(5) xsize(7) 
		graph export "${output}figure5$idea.pdf", as(pdf) replace
	end 


	* Keep one observation per paper to generate the graphs
		
		* keep the first estimate 
		usedata_all
		global idea "a"
		keep if e_id==1 
		distinct p_id 
		publication_bias1

		* keep the estimate closest to the median estimate within a paper 
		usedata_all
		global idea "b"
		bys p_id : egen med_beta = median(e_beta)
		g deviation = abs(med_beta - e_beta) 
		sort p_id e_id
		by p_id : egen min_dev = min(deviation)
		keep if deviation == min_dev 
		duplicates drop e_beta, force 
		publication_bias1
		
	
	*--------------------------------------------------------
	* Figure 6: Publication Bias - Distribution of the z-stat (absolute value - one side)
			*	Brodeur et al (2020) 
			*	Card and Krueger (1995)
	*--------------------------------------------------------
	
		* baseline sample 
		usedata_all
		gen atstat=abs(e_beta/e_se)
		keep if atstat<10
		hist atstat
		histogram atstat  , density kdensity kdenopts(lc(navy)) color(gs14) lcolor(gs12) width(0.1) xline(1.64) xline(1.96) xline(2.32)  ytitle("Density") xtitle("|t-statistics|")  /// 
			note(" ") ///
			graphregion (fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(2) xsize(3) 
		forval r = 1/3 {
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(color(maroon)) editcopy
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(pattern(dash)) editcopy
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(width(thin)) editcopy
		}          
		graph export "${output}figure6a.pdf", as(pdf) replace


		* top journals only 
		usedata_all
		keep if p_lead_j==1
		gen atstat=abs(e_beta/e_se)
		keep if atstat<10
		hist atstat
		histogram atstat, density kdensity kdenopts(lc(navy))  color(gs14) lcolor(gs12) width(0.1) xline(1.64) xline(1.96) xline(2.32) ytitle("Density") xtitle("|t-statistics|")  /// 
				note(" ") ///
				graphregion (fcolor(white) lcolor(white)) plotregion(fcolor(white) lcolor(white)) ysize(2) xsize(3) 
		forval r = 1/3 {
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(color(maroon)) editcopy
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(pattern(dash)) editcopy
			gr_edit plotregion1._xylines[`r'].style.editstyle linestyle(width(thin)) editcopy
		}
		graph export "${output}figure6b.pdf", as(pdf) replace

	
	*--------------------------------------------------------
	* Table 4 - Publication bias - Additional controls
	*--------------------------------------------------------
	eststo clear 
	global tabname "table4"

		* Nr of estimates
		usedata_all
		eststo: meta regress $var_paper ln_nr_e $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
		* Nr of estimates + e_cell/e_indiv
		usedata_all
		eststo: meta regress $var_paper ln_nr_e e_large_sample e_cell e_indiv $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
		* Nr of estimates + e_cell/e_indiv + Nr of authors in the paper 
		usedata_all
		eststo: meta regress $var_paper ln_nr_e e_large_sample e_cell e_indiv p_author_nr $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
		* Baseline estimates only
		usedata_all
		keep if e_baseline==1
		eststo: meta regress $var_paper $var_structH $var_methodH i.p_pub_year, fixed		
		addloc
		
	gentable
	
	
	*--------------------------------------------------------
	* Table 5 - Publication bias - Sub-Samples
	*--------------------------------------------------------
	eststo clear 
	global tabname "table5"
		
		* Leading journals 
		usedata_all
		keep if p_lead_j ==1
		eststo: meta regress $var_paper $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
		* Leading journals excl. 
		usedata_all
		drop if p_lead_j ==1
		eststo: meta regress $var_paper $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
		* Working papers only
		usedata_all
		keep if p_pub_type=="working paper"
		eststo: meta regress $var_paper $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
	gentable
	
	
	*--------------------------------------------------------
	* Table 6 - Exogeneity (Shift-Share Instrumental Variables + Natural experiment)
	*--------------------------------------------------------
	eststo clear 
	global tabname "table6"
	
		* Shift-share IV
		usedata_all
		eststo: meta regress $var_paper $var_structH $var_methodH e_shiftshare_esti i.p_pub_year, fixed
		addloc
		
		* Shift-share IV + discussion
		usedata_all
		eststo: meta regress $var_paper $var_structH $var_methodH e_shiftshare_esti e_shiftsharedis_esti i.p_pub_year, fixed
		addloc

		* Natural experiments 
		usedata_all
		tab p_nat_exp
		eststo: meta regress $var_paper p_nat_exp $var_structH $var_methodH i.p_pub_year, fixed
		addloc
		
	gentable
	
	
	
