Data146

Pre-Exercise Warm-Up:

#Read file
import pandas as pd
path_to_data = 'gapminder.tsv'
data = pd.read_csv(path_to_data,sep='\t')

#Largest GDP per capita
max_gdp = data['gdpPercap'].max()
max_gdp
max_gdp_idx = data['gdpPercap'].idxmax()
max_gdp_idx
data.loc[max_gdp_idx]

idx_asia = data['continent']=='Asia'
data_asia = data[idx_asia]
data_asia.loc[11]
data_asia.iloc[0]
data_asia
country continent year lifeExp pop gdpPercap
0 Afghanistan Asia 1952 28.801 8425333 779.445314
1 Afghanistan Asia 1957 30.332 9240934 820.853030
2 Afghanistan Asia 1962 31.997 10267083 853.100710
3 Afghanistan Asia 1967 34.020 11537966 836.197138
4 Afghanistan Asia 1972 36.088 13079460 739.981106
... ... ... ... ... ... ...
1675 Yemen, Rep. Asia 1987 52.922 11219340 1971.741538
1676 Yemen, Rep. Asia 1992 55.599 13367997 1879.496673
1677 Yemen, Rep. Asia 1997 58.020 15826497 2117.484526
1678 Yemen, Rep. Asia 2002 60.308 18701257 2234.820827
1679 Yemen, Rep. Asia 2007 62.698 22211743 2280.769906

396 rows × 6 columns

Exercises

#All the years in the data, no duplicates
years = list(set(list(data['year'])))
years
[1952, 1987, 1957, 1992, 1962, 1997, 1967, 2002, 1972, 2007, 1977, 1982]
#How many unique values are there?
len(years)
12
#What are they?
years
[1952, 1987, 1957, 1992, 1962, 1997, 1967, 2002, 1972, 2007, 1977, 1982]
#What is the largest pop value?
pop_max = data['pop'].max()
pop_max
1318683096
#When and where did this occur
idx_pop_max = data['pop'].idxmax()
data.loc[idx_pop_max]['year']
2007
data.loc[idx_pop_max]['country']
'China'
#Extract records for Europe
idx_europe = data['continent'] == 'Europe'
data_europe = data[idx_europe]
#In 1952, which country had the smallest population?
idx_europe_1952 = (data['continent']=='Europe') & (data['year']==1952)
data_europe_1952 = data[idx_europe_1952]
pop_min = data_europe_1952['pop'].min()
pop_min
idx_pop_min = data_europe['pop'].idxmin()
data.loc[idx_pop_min]['country']
'Iceland'
#What was the population in 2007?
idx_iceland_2007 = (data['year']==2007) & (data['country']=='Iceland')
data_iceland_2007 = data[idx_iceland_2007]
data_iceland_2007['pop']
695    301931
Name: pop, dtype: int64
#In one step:
iceland_pop_2007 = data[(data['year']==2007)&(data['country']=='Iceland')]['pop']
iceland_pop_2007
695    301931
Name: pop, dtype: int64