Pre-Exercise Warm-Up:
#Read file
import pandas as pd
path_to_data = 'gapminder.tsv'
data = pd.read_csv(path_to_data,sep='\t')
#Largest GDP per capita
max_gdp = data['gdpPercap'].max()
max_gdp
max_gdp_idx = data['gdpPercap'].idxmax()
max_gdp_idx
data.loc[max_gdp_idx]
idx_asia = data['continent']=='Asia'
data_asia = data[idx_asia]
data_asia.loc[11]
data_asia.iloc[0]
data_asia
country | continent | year | lifeExp | pop | gdpPercap | |
---|---|---|---|---|---|---|
0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 |
1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 |
2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 |
3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 |
4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 |
... | ... | ... | ... | ... | ... | ... |
1675 | Yemen, Rep. | Asia | 1987 | 52.922 | 11219340 | 1971.741538 |
1676 | Yemen, Rep. | Asia | 1992 | 55.599 | 13367997 | 1879.496673 |
1677 | Yemen, Rep. | Asia | 1997 | 58.020 | 15826497 | 2117.484526 |
1678 | Yemen, Rep. | Asia | 2002 | 60.308 | 18701257 | 2234.820827 |
1679 | Yemen, Rep. | Asia | 2007 | 62.698 | 22211743 | 2280.769906 |
396 rows × 6 columns
Exercises
#All the years in the data, no duplicates
years = list(set(list(data['year'])))
years
[1952, 1987, 1957, 1992, 1962, 1997, 1967, 2002, 1972, 2007, 1977, 1982]
#How many unique values are there?
len(years)
12
#What are they?
years
[1952, 1987, 1957, 1992, 1962, 1997, 1967, 2002, 1972, 2007, 1977, 1982]
#What is the largest pop value?
pop_max = data['pop'].max()
pop_max
1318683096
#When and where did this occur
idx_pop_max = data['pop'].idxmax()
data.loc[idx_pop_max]['year']
2007
data.loc[idx_pop_max]['country']
'China'
#Extract records for Europe
idx_europe = data['continent'] == 'Europe'
data_europe = data[idx_europe]
#In 1952, which country had the smallest population?
idx_europe_1952 = (data['continent']=='Europe') & (data['year']==1952)
data_europe_1952 = data[idx_europe_1952]
pop_min = data_europe_1952['pop'].min()
pop_min
idx_pop_min = data_europe['pop'].idxmin()
data.loc[idx_pop_min]['country']
'Iceland'
#What was the population in 2007?
idx_iceland_2007 = (data['year']==2007) & (data['country']=='Iceland')
data_iceland_2007 = data[idx_iceland_2007]
data_iceland_2007['pop']
695 301931
Name: pop, dtype: int64
#In one step:
iceland_pop_2007 = data[(data['year']==2007)&(data['country']=='Iceland')]['pop']
iceland_pop_2007
695 301931
Name: pop, dtype: int64