You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Of all the variables we want just two: Life Ladder: self-reported average happiness by country and year, on 0-10 scale (Cantril ladder) Log GDP per capita: GDP per person in 2011 (constant) PPP (purchasing-power parity), $
download.file(url="https://s3.amazonaws.com/happiness-report/2019/Chapter2OnlineData.xls",
destfile= paste0(path, "WHR_2019_data.xls"), mode="wb", method="auto")
raw.dat<- read_excel(path="WHR_2019_data.xls", sheet="Table2.1")
#happiness and gdp by yeardat<-raw.dat %>% select(name=`Country name`, year=Year, happy=`Life Ladder`, gdp.pc=`Log GDP per capita`) %>%
mutate(name= ifelse(name=="Taiwan Province of China", "Taiwan", name)) %>%
mutate(name= ifelse(name=="Hong Kong S.A.R. of China", "Hong Kong", name)) %>%
#Add in ISO2c code
mutate(iso2c= countrycode(name, origin="country.name", destination="iso2c")) %>%
#Merge with population data
left_join(., select(pop, iso2c, year, pop.adult), by= c("iso2c", "year")) %>%
#un-log GDP (take exp)
mutate(gdp.pc2= exp(gdp.pc)) %>%
select(iso2c, name, year, happy, log.gdp.pc=gdp.pc, gdp.pc=gdp.pc2, pop.adult) %>%
arrange(iso2c, year) %>%
filter(!is.na(iso2c)) #remove some NA regions (Kosovo)
## Warning in countrycode(name, origin = "country.name", destination = "iso2c"): Some values were not matched unambiguously: Kosovo
kable(head(dat)); kable(tail(dat)); count(dat, year) %>% t %>% kable
iso2c
name
year
happy
log.gdp.pc
gdp.pc
pop.adult
AE
United Arab Emirates
2006
6.734222
11.38607
88086.37
4365939
AE
United Arab Emirates
2009
6.866063
11.01485
60769.89
6600994
AE
United Arab Emirates
2010
7.097455
10.95488
57232.59
7164304
AE
United Arab Emirates
2011
7.118702
10.97445
58363.69
7497475
AE
United Arab Emirates
2012
7.217767
10.99237
59419.08
7690070
AE
United Arab Emirates
2013
6.620951
11.02985
61688.39
7779198
iso2c
name
year
happy
log.gdp.pc
gdp.pc
pop.adult
ZW
Zimbabwe
2013
4.690188
7.565154
1929.765
8805759
ZW
Zimbabwe
2014
4.184451
7.562753
1925.139
9015154
ZW
Zimbabwe
2015
3.703191
7.556052
1912.280
9234454
ZW
Zimbabwe
2016
3.735400
7.538829
1879.628
9467437
ZW
Zimbabwe
2017
3.638300
7.549491
1899.775
9718776
ZW
Zimbabwe
2018
3.616480
7.553395
1907.206
9976787
year
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
n
27
89
101
109
113
123
145
141
136
144
142
141
146
135
Chart data
Make data for scatter chart showing in the print version of the article. We shall take average happiness and GDP per person for two periods: 2005-08 and 2016-18. We then match these two sets of data together, arrving at 125 country pairs of GDP and happiness, approximately a decade apart.
#Filter plot data -- for adult population >5m. Reduces number of countries from 125 to 85 pairsscat.dat %>% filter(year==2018) %>% filter(pop>5000000) %>% select(iso2c) ->iso2c.select
nrow(iso2c.select)
## [1] 85
#And so our population selection looks like: scat.dat %>% filter(year==2018, iso2c%in%iso2c.select$iso2c) %>% count(pop.break) %>% kable
pop.break
n
<25m
54
25m-100m
21
100m-500m
8
500m+
2
#Make scatter chart with ggplotscat.plot<-scat.dat %>%
filter(., year==2018) %>%
filter(iso2c%in%iso2c.select$iso2c) %>%
ggplot(., aes(x=gdp.pc/10^3, y=happy, group=name, color=paradox)) +
geom_point(mapping=aes(size=pop.levels), alpha=0.5) +
scale_colour_manual(values=c("blue", "red"), labels=c("same direction", "opposite directions"),
guide=guide_legend(title="Happiness and GDP per person:", title.position="top")) +
scale_radius(labels=c('5-25','25-100','100-5000','500+'), guide=guide_legend(title="Population, m", title.position="top")) +
scale_x_log10(breaks=c(1e2,1e3,1e4,1e5)/10^3,
minor_breaks=c(seq(1e2,1e3,1e2)/10^3, seq(1e3,1e4,1e3)/10^3, seq(1e4,1e5,1e4)/10^3, seq(1e5,1e6,1e5)/10^3)) +#geom_text(data=filter(scat.dat, year==2018, iso2c %in% iso2c.select$iso2c), mapping=aes(label=name), hjust=0) +
geom_path(data=filter(scat.dat, iso2c%in%iso2c.select$iso2c)) +
theme_minimal() + theme(legend.position='top') + theme(aspect.ratio=0.5) +
ggtitle("GDP per person v self-reported happiness", subtitle="85 countries with adult population over 5m") +
ylab("Happiness, 0-10 scale") + xlab("GDP per person, $'000, at purchasing-power parity, log scale") +
ylim(3, 8) + geom_blank()