import pandas as pd
import numpy as np
features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status", "Occupation", "Relationship",
"Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target"]
df = pd.read_csv('adult.data', names=features)
df
gender_count = df['Sex'].value_counts()
print(gender_count)
df[["Sex","Age"]].groupby("Sex").mean().head(1)
num_German = len(df[df['Country'] == ' Germany'])
all_citizens = len(df)
print('German citizens is: ',num_German / all_citizens)
more_than50 = df[df['Target'] == ' >50K']['Age']
print('Mean of those whose salary is more than 50K: ',round(np.mean(more_than50),2))
print('Standard Deviation of those whose salary is more than 50K: ',round(np.std(more_than50),2))
less_than50 = df[df['Target'] == ' <=50K']['Age']
print('\nMean of those whose salary is less than 50K: ',round(np.mean(less_than50),2))
print('Standard Deviation of those whose salary is less than 50K: ',round(np.std(less_than50),2))
high_income_education = ['Bachelors', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', 'Masters', 'Doctorate']
high_income_education_check = df.loc[df['Target'] == '>50K', 'Education'].isin(high_income_education).all()
print(high_income_education_check)