import pandas as pd
import plotnine as p9
from plotnine import *
df = pd.DataFrame()
for i in range(1980, 2015):
yearpart = pd.read_csv("~/Downloads/Names/yob" + str(i) + ".txt", header=None)
yearpart['year'] = i
df = df.append(yearpart)
df.columns = ['Name', 'Gender', 'Count', 'Year']
df.shape
my_name = df[df.Name=="Ishmael"]
ggplot(my_name, aes("Year", "Count")) + geom_line() + ylim(0,200)
girl_names = df[df.Gender=="F"]
year_count = pd.DataFrame(girl_names.groupby('Year').Name.count()).reset_index()
ggplot(year_count, aes("Year", "Name")) + geom_line()
def toptennames(gender, year):
ttn = df[(df.Year==year) & (df.Gender==gender)]
ttn = ttn.sort_values("Count", ascending=False)
return(ttn.head(10))
toptennames("F", 1990)
names_2014 = df[df.Year == 2014]
names_2014.drop(['Year'], axis=1, inplace=True)
names_2014_male = names_2014[names_2014.Gender=="M"]
names_2014_female = names_2014[names_2014.Gender=="F"]
names_2014 = names_2014_male.merge(names_2014_female,
on='Name',
how='outer',
suffixes=['_male', '_female'])
names_2014 = names_2014.fillna(0)
names_2014['total'] = names_2014.Count_male - names_2014.Count_female
names_2014['abstotal'] = names_2014.total.abs()
names_2014 = names_2014.sort_values('abstotal', ascending=True)
names_2014 = names_2014[(names_2014.Count_male + names_2014.Count_female) > 1000]
names_2014.head(10)