#Heartdisease
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv("C:\\Users\MNLVBPV\heart_disease_uci.csv")
df
id | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
915 | 916 | 54 | Female | VA Long Beach | asymptomatic | 127.0 | 333.0 | True | st-t abnormality | 154.0 | False | 0.0 | NaN | NaN | NaN | 1 |
916 | 917 | 62 | Male | VA Long Beach | typical angina | NaN | 139.0 | False | st-t abnormality | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
917 | 918 | 55 | Male | VA Long Beach | asymptomatic | 122.0 | 223.0 | True | st-t abnormality | 100.0 | False | 0.0 | NaN | NaN | fixed defect | 2 |
918 | 919 | 58 | Male | VA Long Beach | asymptomatic | NaN | 385.0 | True | lv hypertrophy | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
919 | 920 | 62 | Male | VA Long Beach | atypical angina | 120.0 | 254.0 | False | lv hypertrophy | 93.0 | True | 0.0 | NaN | NaN | NaN | 1 |
920 rows × 16 columns
df.head(5)
id | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 920 entries, 0 to 919 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 920 non-null int64 1 age 920 non-null int64 2 sex 920 non-null object 3 dataset 920 non-null object 4 cp 920 non-null object 5 trestbps 861 non-null float64 6 chol 890 non-null float64 7 fbs 830 non-null object 8 restecg 918 non-null object 9 thalch 865 non-null float64 10 exang 865 non-null object 11 oldpeak 858 non-null float64 12 slope 611 non-null object 13 ca 309 non-null float64 14 thal 434 non-null object 15 num 920 non-null int64 dtypes: float64(5), int64(3), object(8) memory usage: 115.1+ KB
df.shape
(920, 16)
df.columns
Index(['id', 'age', 'sex', 'dataset', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num'], dtype='object')
df['cp'].value_counts()
asymptomatic 496 non-anginal 204 atypical angina 174 typical angina 46 Name: cp, dtype: int64
df.isna().sum()
id 0 age 0 sex 0 dataset 0 cp 0 trestbps 59 chol 30 fbs 90 restecg 2 thalch 55 exang 55 oldpeak 62 slope 309 ca 611 thal 486 num 0 dtype: int64
df.columns= ['id', 'age', 'sex', 'study_location', 'chest_pain_type', 'resting_bloodpressure',
'cholestrol', 'fasting_bloodsugar', 'resting_ecg', 'maximum_heartrate_achieved',
'exercise_induced angina',
'st_depression', 'slope', 'major_vessel_colored', 'thal','target']
df.head(5)
id | age | sex | study_location | chest_pain_type | resting_bloodpressure | cholestrol | fasting_bloodsugar | resting_ecg | maximum_heartrate_achieved | exercise_induced angina | st_depression | slope | major_vessel_colored | thal | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
df.describe()
id | age | resting_bloodpressure | cholestrol | maximum_heartrate_achieved | st_depression | major_vessel_colored | target | |
---|---|---|---|---|---|---|---|---|
count | 920.000000 | 920.000000 | 861.000000 | 890.000000 | 865.000000 | 858.000000 | 309.000000 | 920.000000 |
mean | 460.500000 | 53.510870 | 132.132404 | 199.130337 | 137.545665 | 0.878788 | 0.676375 | 0.995652 |
std | 265.725422 | 9.424685 | 19.066070 | 110.780810 | 25.926276 | 1.091226 | 0.935653 | 1.142693 |
min | 1.000000 | 28.000000 | 0.000000 | 0.000000 | 60.000000 | -2.600000 | 0.000000 | 0.000000 |
25% | 230.750000 | 47.000000 | 120.000000 | 175.000000 | 120.000000 | 0.000000 | 0.000000 | 0.000000 |
50% | 460.500000 | 54.000000 | 130.000000 | 223.000000 | 140.000000 | 0.500000 | 0.000000 | 1.000000 |
75% | 690.250000 | 60.000000 | 140.000000 | 268.000000 | 157.000000 | 1.500000 | 1.000000 | 2.000000 |
max | 920.000000 | 77.000000 | 200.000000 | 603.000000 | 202.000000 | 6.200000 | 3.000000 | 4.000000 |
df.describe(include=[np.object])
sex | study_location | chest_pain_type | fasting_bloodsugar | resting_ecg | exercise_induced angina | slope | thal | |
---|---|---|---|---|---|---|---|---|
count | 920 | 920 | 920 | 830 | 918 | 865 | 611 | 434 |
unique | 2 | 4 | 4 | 2 | 3 | 2 | 3 | 3 |
top | Male | Cleveland | asymptomatic | False | normal | False | flat | normal |
freq | 726 | 304 | 496 | 692 | 551 | 528 | 345 | 196 |
df.dropna(inplace=True)
df.isnull().sum()
id 0 age 0 sex 0 study_location 0 chest_pain_type 0 resting_bloodpressure 0 cholestrol 0 fasting_bloodsugar 0 resting_ecg 0 maximum_heartrate_achieved 0 exercise_induced angina 0 st_depression 0 slope 0 major_vessel_colored 0 thal 0 target 0 dtype: int64
df.shape
(299, 16)
df['target'].value_counts()
0 160 1 56 2 35 3 35 4 13 Name: target, dtype: int64
plt.figure(figsize=(18,12))
plt.subplot(221)
df['sex'].value_counts().plot.pie(autopct = '%1.0f%%', colors = sns.color_palette('prism',5),
startangle = 900, labels =['Male','Female'],
wedgeprops = {'linewidth':2, 'edgecolor':'w'},
explode = [.1,.1], shadow = True)
plt.title('Distribution of Gender')
plt.subplot(222)
ax = sns.distplot(df['age'], rug = True)
plt.title('Age wise distribution')
plt.show()
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
y = df['id']
X = df.drop(['id'],axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=30)
df
id | age | sex | study_location | chest_pain_type | resting_bloodpressure | cholestrol | fasting_bloodsugar | resting_ecg | maximum_heartrate_achieved | exercise_induced angina | st_depression | slope | major_vessel_colored | thal | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
299 | 300 | 68 | Male | Cleveland | asymptomatic | 144.0 | 193.0 | True | normal | 141.0 | False | 3.4 | flat | 2.0 | reversable defect | 2 |
300 | 301 | 57 | Male | Cleveland | asymptomatic | 130.0 | 131.0 | False | normal | 115.0 | True | 1.2 | flat | 1.0 | reversable defect | 3 |
301 | 302 | 57 | Female | Cleveland | atypical angina | 130.0 | 236.0 | False | lv hypertrophy | 174.0 | False | 0.0 | flat | 1.0 | normal | 1 |
508 | 509 | 47 | Male | Hungary | asymptomatic | 150.0 | 226.0 | False | normal | 98.0 | True | 1.5 | flat | 0.0 | reversable defect | 1 |
748 | 749 | 56 | Male | VA Long Beach | asymptomatic | 120.0 | 100.0 | False | normal | 120.0 | True | 1.5 | flat | 0.0 | reversable defect | 1 |
299 rows × 16 columns