import pandas as pd
import numpy as np


features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status", "Occupation", "Relationship", 
             "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target"] 

df = pd.read_csv('adult.data', names=features)
df


gender_count = df['Sex'].value_counts()
print(gender_count)


df[["Sex","Age"]].groupby("Sex").mean().head(1)


num_German = len(df[df['Country'] == ' Germany'])
all_citizens = len(df)
print('German citizens is: ',num_German / all_citizens)


more_than50 = df[df['Target'] == ' >50K']['Age']
print('Mean of those whose salary is more than 50K: ',round(np.mean(more_than50),2))
print('Standard Deviation of those whose salary is more than 50K: ',round(np.std(more_than50),2))

less_than50 = df[df['Target'] == ' <=50K']['Age']
print('\nMean of those whose salary is less than 50K: ',round(np.mean(less_than50),2))
print('Standard Deviation of those whose salary is less than 50K: ',round(np.std(less_than50),2))


high_income_education = ['Bachelors', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', 'Masters', 'Doctorate']
high_income_education_check = df.loc[df['Target'] == '>50K', 'Education'].isin(high_income_education).all()
print(high_income_education_check)