import numpy as np
import pandas as pd
d=pd.read_csv("C:\\Users\\Anirudh\\Desktop\\onlinefoods.csv")
df=pd.DataFrame(d)
print(d)
Age Gender Marital Status Occupation Monthly Income \ 0 20 Female Single Student No Income 1 24 Female Single Student Below Rs.10000 2 22 Male Single Student Below Rs.10000 3 22 Female Single Student No Income 4 22 Male Single Student Below Rs.10000 .. ... ... ... ... ... 383 23 Female Single Student No Income 384 23 Female Single Student No Income 385 22 Female Single Student No Income 386 23 Male Single Student Below Rs.10000 387 23 Male Single Student No Income Educational Qualifications Family size latitude longitude Pin code \ 0 Post Graduate 4 12.9766 77.5993 560001 1 Graduate 3 12.9770 77.5773 560009 2 Post Graduate 3 12.9551 77.6593 560017 3 Graduate 6 12.9473 77.5616 560019 4 Post Graduate 4 12.9850 77.5533 560010 .. ... ... ... ... ... 383 Post Graduate 2 12.9766 77.5993 560001 384 Post Graduate 4 12.9854 77.7081 560048 385 Post Graduate 5 12.9850 77.5533 560010 386 Post Graduate 2 12.9770 77.5773 560009 387 Post Graduate 5 12.8988 77.5764 560078 Output Feedback Unnamed: 12 0 Yes Positive Yes 1 Yes Positive Yes 2 Yes Negative Yes 3 Yes Positive Yes 4 Yes Positive Yes .. ... ... ... 383 Yes Positive Yes 384 Yes Positive Yes 385 Yes Positive Yes 386 Yes Positive Yes 387 Yes Positive Yes [388 rows x 13 columns]
type(df)
pandas.core.frame.DataFrame
df['Gender'].dtype
dtype('O')
df.head()
Age | Gender | Marital Status | Occupation | Monthly Income | Educational Qualifications | Family size | latitude | longitude | Pin code | Output | Feedback | Unnamed: 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20 | Female | Single | Student | No Income | Post Graduate | 4 | 12.9766 | 77.5993 | 560001 | Yes | Positive | Yes |
1 | 24 | Female | Single | Student | Below Rs.10000 | Graduate | 3 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
2 | 22 | Male | Single | Student | Below Rs.10000 | Post Graduate | 3 | 12.9551 | 77.6593 | 560017 | Yes | Negative | Yes |
3 | 22 | Female | Single | Student | No Income | Graduate | 6 | 12.9473 | 77.5616 | 560019 | Yes | Positive | Yes |
4 | 22 | Male | Single | Student | Below Rs.10000 | Post Graduate | 4 | 12.9850 | 77.5533 | 560010 | Yes | Positive | Yes |
df.tail()
Age | Gender | Marital Status | Occupation | Monthly Income | Educational Qualifications | Family size | latitude | longitude | Pin code | Output | Feedback | Unnamed: 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
383 | 23 | Female | Single | Student | No Income | Post Graduate | 2 | 12.9766 | 77.5993 | 560001 | Yes | Positive | Yes |
384 | 23 | Female | Single | Student | No Income | Post Graduate | 4 | 12.9854 | 77.7081 | 560048 | Yes | Positive | Yes |
385 | 22 | Female | Single | Student | No Income | Post Graduate | 5 | 12.9850 | 77.5533 | 560010 | Yes | Positive | Yes |
386 | 23 | Male | Single | Student | Below Rs.10000 | Post Graduate | 2 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
387 | 23 | Male | Single | Student | No Income | Post Graduate | 5 | 12.8988 | 77.5764 | 560078 | Yes | Positive | Yes |
print(df.describe())
Age Family size latitude longitude Pin code count 388.000000 388.000000 388.000000 388.000000 388.000000 mean 24.628866 3.280928 12.972058 77.600160 560040.113402 std 2.975593 1.351025 0.044489 0.051354 31.399609 min 18.000000 1.000000 12.865200 77.484200 560001.000000 25% 23.000000 2.000000 12.936900 77.565275 560010.750000 50% 24.000000 3.000000 12.977000 77.592100 560033.500000 75% 26.000000 4.000000 12.997025 77.630900 560068.000000 max 33.000000 6.000000 13.102000 77.758200 560109.000000
df["Age"].min()
18
df["Age"].max()
33
df.apply(lambda x : x)
Age | Gender | Marital Status | Occupation | Monthly Income | Educational Qualifications | Family size | latitude | longitude | Pin code | Output | Feedback | Unnamed: 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20 | Female | Single | Student | No Income | Post Graduate | 4 | 12.9766 | 77.5993 | 560001 | Yes | Positive | Yes |
1 | 24 | Female | Single | Student | Below Rs.10000 | Graduate | 3 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
2 | 22 | Male | Single | Student | Below Rs.10000 | Post Graduate | 3 | 12.9551 | 77.6593 | 560017 | Yes | Negative | Yes |
3 | 22 | Female | Single | Student | No Income | Graduate | 6 | 12.9473 | 77.5616 | 560019 | Yes | Positive | Yes |
4 | 22 | Male | Single | Student | Below Rs.10000 | Post Graduate | 4 | 12.9850 | 77.5533 | 560010 | Yes | Positive | Yes |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
383 | 23 | Female | Single | Student | No Income | Post Graduate | 2 | 12.9766 | 77.5993 | 560001 | Yes | Positive | Yes |
384 | 23 | Female | Single | Student | No Income | Post Graduate | 4 | 12.9854 | 77.7081 | 560048 | Yes | Positive | Yes |
385 | 22 | Female | Single | Student | No Income | Post Graduate | 5 | 12.9850 | 77.5533 | 560010 | Yes | Positive | Yes |
386 | 23 | Male | Single | Student | Below Rs.10000 | Post Graduate | 2 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
387 | 23 | Male | Single | Student | No Income | Post Graduate | 5 | 12.8988 | 77.5764 | 560078 | Yes | Positive | Yes |
388 rows × 13 columns
df.apply(lambda x : x[0])
Age 20 Gender Female Marital Status Single Occupation Student Monthly Income No Income Educational Qualifications Post Graduate Family size 4 latitude 12.9766 longitude 77.5993 Pin code 560001 Output Yes Feedback Positive Unnamed: 12 Yes dtype: object
df.sort_values("Monthly Income",ascending=True)
Age | Gender | Marital Status | Occupation | Monthly Income | Educational Qualifications | Family size | latitude | longitude | Pin code | Output | Feedback | Unnamed: 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
253 | 22 | Male | Single | Employee | 10001 to 25000 | Graduate | 2 | 12.9299 | 77.6848 | 560103 | Yes | Positive | Yes |
35 | 25 | Male | Single | Student | 10001 to 25000 | Post Graduate | 3 | 12.9306 | 77.5434 | 560085 | Yes | Positive | Yes |
273 | 24 | Female | Married | Employee | 10001 to 25000 | Post Graduate | 2 | 12.9561 | 77.5921 | 560027 | Yes | Positive | Yes |
187 | 25 | Male | Single | Employee | 10001 to 25000 | Graduate | 2 | 12.9757 | 77.5586 | 560023 | Yes | Positive | Yes |
279 | 23 | Female | Prefer not to say | Employee | 10001 to 25000 | Graduate | 4 | 12.9048 | 77.6821 | 560036 | Yes | Positive | Yes |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
157 | 32 | Female | Married | House wife | No Income | Graduate | 3 | 12.9850 | 77.5533 | 560010 | Yes | Positive | Yes |
159 | 20 | Female | Single | Student | No Income | Graduate | 2 | 12.9337 | 77.5900 | 560011 | Yes | Positive | Yes |
160 | 21 | Male | Single | Student | No Income | Graduate | 2 | 12.9337 | 77.5900 | 560011 | Yes | Positive | Yes |
133 | 19 | Male | Single | Student | No Income | Graduate | 2 | 13.0019 | 77.5713 | 560003 | No | Negative | No |
387 | 23 | Male | Single | Student | No Income | Post Graduate | 5 | 12.8988 | 77.5764 | 560078 | Yes | Positive | Yes |
388 rows × 13 columns
df.sort_values(by="Marital Status")
Age | Gender | Marital Status | Occupation | Monthly Income | Educational Qualifications | Family size | latitude | longitude | Pin code | Output | Feedback | Unnamed: 12 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
290 | 27 | Female | Married | Self Employeed | More than 50000 | Graduate | 6 | 13.0734 | 77.5464 | 560014 | No | Positive | No |
278 | 30 | Male | Married | Employee | More than 50000 | Graduate | 5 | 12.9719 | 77.5128 | 560072 | No | Negative | No |
130 | 28 | Male | Married | Employee | More than 50000 | Post Graduate | 3 | 13.0019 | 77.5713 | 560003 | Yes | Positive | Yes |
131 | 32 | Female | Married | Employee | More than 50000 | Graduate | 1 | 13.0019 | 77.5713 | 560003 | No | Positive | No |
276 | 24 | Male | Married | Employee | More than 50000 | Post Graduate | 3 | 12.9515 | 77.4921 | 560056 | Yes | Positive | Yes |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
129 | 23 | Female | Single | Student | No Income | Post Graduate | 4 | 13.0487 | 77.5923 | 560024 | Yes | Positive | Yes |
128 | 23 | Male | Single | Student | No Income | Post Graduate | 3 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
127 | 25 | Male | Single | Student | No Income | Post Graduate | 4 | 12.9770 | 77.5773 | 560009 | Yes | Positive | Yes |
122 | 26 | Female | Single | Self Employeed | 25001 to 50000 | Post Graduate | 3 | 12.9635 | 77.5821 | 560002 | Yes | Positive | Yes |
387 | 23 | Male | Single | Student | No Income | Post Graduate | 5 | 12.8988 | 77.5764 | 560078 | Yes | Positive | Yes |
388 rows × 13 columns
df.value_counts("Marital Status")
Marital Status Single 268 Married 108 Prefer not to say 12 dtype: int64
df.sort_values(by="Occupation")
df.value_counts("Occupation")
Occupation Student 207 Employee 118 Self Employeed 54 House wife 9 dtype: int64
df.sort_values(by="Feedback")
df.value_counts("Feedback")
Feedback Positive 317 Negative 71 dtype: int64
df.groupby('Output').size()
Output No 87 Yes 301 dtype: int64
import seaborn as sns
sns.countplot(df['Output'],label="Count")
C:\Users\Anirudh\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='Output', ylabel='count'>
sns.countplot(df['Feedback'],label="Count")
C:\Users\Anirudh\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='Feedback', ylabel='count'>
df.groupby('Occupation').size()
Occupation Employee 118 House wife 9 Self Employeed 54 Student 207 dtype: int64
df.groupby('Educational Qualifications').size()
Educational Qualifications Graduate 177 Ph.D 23 Post Graduate 174 School 12 Uneducated 2 dtype: int64
from matplotlib import pyplot
%matplotlib inline
df.hist(figsize=(12,10))
pyplot.show()
df.plot(kind='density', subplots=True, layout=(3,3), sharex=False,figsize=(12,10))
pyplot.show()
df.plot(kind='box', subplots=True, layout=(3,3), sharex=False, sharey=False,figsize=(12,10))
pyplot.show()
import pandas
from pandas.plotting import scatter_matrix
dataCorr = df.corr()
pandas.plotting.scatter_matrix(dataCorr,figsize=(12,10))
pyplot.show()
df['Unnamed: 12'].value_counts(normalize=True)
Yes 0.775773 No 0.224227 Name: Unnamed: 12, dtype: float64
df['Unnamed: 12'].value_counts().plot.bar()
<AxesSubplot:>
df['Gender'].value_counts().plot.bar()
<AxesSubplot:>
import matplotlib.pyplot as plt
df['Gender'].value_counts(normalize=True).plot.bar(title= 'Gender')
<AxesSubplot:title={'center':'Gender'}>
df['Marital Status'].value_counts(normalize=True).plot.bar(title= 'Marital Status')
<AxesSubplot:title={'center':'Marital Status'}>
df['Educational Qualifications'].value_counts(normalize=True).plot.bar(title='Educational Qualifications')
<AxesSubplot:title={'center':'Educational Qualifications'}>
df['Educational Qualifications'].value_counts(normalize=True).plot.line(title='Educational Qualifications')
<AxesSubplot:title={'center':'Educational Qualifications'}>
df['Age'].value_counts(normalize=True).plot.line(title='Age')
<AxesSubplot:title={'center':'Age'}>
table = pd.pivot_table(data=df,index=['Gender'])
table
Age | Family size | Pin code | latitude | longitude | |
---|---|---|---|---|---|
Gender | |||||
Female | 24.415663 | 3.409639 | 560038.819277 | 12.974443 | 77.603669 |
Male | 24.788288 | 3.184685 | 560041.081081 | 12.970274 | 77.597535 |