# import libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
#getting dataset
data = pd.read_csv('c:/Users/STS/Desktop/heartdata.csv')
data.head()
ff | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 63 | Male | Cleveland | typical angina | 145.0 | 233.0 | True | lv hypertrophy | 150.0 | False | 2.3 | downsloping | 0.0 | fixed defect | 0 |
1 | 2 | 67 | Male | Cleveland | asymptomatic | 160.0 | 286.0 | False | lv hypertrophy | 108.0 | True | 1.5 | flat | 3.0 | normal | 2 |
2 | 3 | 67 | Male | Cleveland | asymptomatic | 120.0 | 229.0 | False | lv hypertrophy | 129.0 | True | 2.6 | flat | 2.0 | reversable defect | 1 |
3 | 4 | 37 | Male | Cleveland | non-anginal | 130.0 | 250.0 | False | normal | 187.0 | False | 3.5 | downsloping | 0.0 | normal | 0 |
4 | 5 | 41 | Female | Cleveland | atypical angina | 130.0 | 204.0 | False | lv hypertrophy | 172.0 | False | 1.4 | upsloping | 0.0 | normal | 0 |
data.shape
(920, 16)
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 920 entries, 0 to 919 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ff 920 non-null int64 1 age 920 non-null int64 2 sex 920 non-null object 3 dataset 920 non-null object 4 cp 920 non-null object 5 trestbps 861 non-null float64 6 chol 890 non-null float64 7 fbs 830 non-null object 8 restecg 918 non-null object 9 thalch 865 non-null float64 10 exang 865 non-null object 11 oldpeak 858 non-null float64 12 slope 611 non-null object 13 ca 309 non-null float64 14 thal 434 non-null object 15 num 920 non-null int64 dtypes: float64(5), int64(3), object(8) memory usage: 115.1+ KB
data['dataset'].value_counts()
Cleveland 304 Hungary 293 VA Long Beach 200 Switzerland 123 Name: dataset, dtype: int64
#data transformation
col=['age','fbs','exang']
for col in data:
le= LabelEncoder()
data[col]=le.fit_transform(data[col])
data.tail()
ff | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
915 | 915 | 26 | 0 | 3 | 0 | 28 | 179 | 1 | 2 | 80 | 0 | 10 | 3 | 4 | 3 | 1 |
916 | 916 | 34 | 1 | 3 | 3 | 61 | 8 | 0 | 2 | 119 | 2 | 53 | 3 | 4 | 3 | 0 |
917 | 917 | 27 | 1 | 3 | 0 | 23 | 76 | 1 | 2 | 27 | 0 | 10 | 3 | 4 | 0 | 2 |
918 | 918 | 30 | 1 | 3 | 0 | 61 | 199 | 1 | 0 | 119 | 2 | 53 | 3 | 4 | 3 | 0 |
919 | 919 | 34 | 1 | 3 | 1 | 22 | 107 | 0 | 0 | 20 | 1 | 10 | 3 | 4 | 3 | 1 |
#filling not a number with statistical values
data['oldpeak'].fillna(data['oldpeak'].mean(),inplace=True)
data.tail()
ff | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
915 | 915 | 26 | 0 | 3 | 0 | 28 | 179 | 1 | 2 | 80 | 0 | 10 | 3 | 4 | 3 | 1 |
916 | 916 | 34 | 1 | 3 | 3 | 61 | 8 | 0 | 2 | 119 | 2 | 53 | 3 | 4 | 3 | 0 |
917 | 917 | 27 | 1 | 3 | 0 | 23 | 76 | 1 | 2 | 27 | 0 | 10 | 3 | 4 | 0 | 2 |
918 | 918 | 30 | 1 | 3 | 0 | 61 | 199 | 1 | 0 | 119 | 2 | 53 | 3 | 4 | 3 | 0 |
919 | 919 | 34 | 1 | 3 | 1 | 22 | 107 | 0 | 0 | 20 | 1 | 10 | 3 | 4 | 3 | 1 |
#standardisation of the data
ss=StandardScaler()
col=['restecg','thalch','chol','trestbps','oldpeak']
data[col]=ss.fit_transform(data[col])
data.tail()
ff | age | sex | dataset | cp | trestbps | chol | fbs | restecg | thalch | exang | oldpeak | slope | ca | thal | num | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
915 | 915 | 26 | 0 | 3 | 0 | -0.321438 | 1.553066 | 1 | 1.574882 | 0.465705 | 0 | -0.837596 | 3 | 4 | 3 | 1 |
916 | 916 | 34 | 1 | 3 | 3 | 2.041479 | -1.216343 | 0 | 1.574882 | 1.866099 | 2 | 2.434380 | 3 | 4 | 3 | 0 |
917 | 917 | 27 | 1 | 3 | 0 | -0.679455 | -0.115058 | 1 | 1.574882 | -1.437394 | 0 | -0.837596 | 3 | 4 | 0 | 2 |
918 | 918 | 30 | 1 | 3 | 0 | 2.041479 | 1.876974 | 1 | -1.557856 | 1.866099 | 2 | 2.434380 | 3 | 4 | 3 | 0 |
919 | 919 | 34 | 1 | 3 | 1 | -0.751059 | 0.386999 | 0 | -1.557856 | -1.688747 | 1 | -0.837596 | 3 | 4 | 3 | 1 |
data['trestbps'].value_counts()
-0.751059 131 -0.106627 115 0.322994 102 -1.323887 59 2.041479 59 ... -1.753508 1 -0.607852 1 1.540254 1 -0.178231 1 -0.321438 1 Name: trestbps, Length: 62, dtype: int64
x=data.drop(columns='num',axis=1)
y=data['num']
#training machine
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)
print(x.shape,x_train.shape,x_test.shape)
(920, 15) (736, 15) (184, 15)
#predicting accuracy of the model
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(x_train,y_train)
C:\Users\STS\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1): STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Increase the number of iterations (max_iter) or scale the data as shown in: https://scikit-learn.org/stable/modules/preprocessing.html Please also refer to the documentation for alternative solver options: https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression n_iter_i = _check_optimize_result(
LogisticRegression()
from sklearn.metrics import accuracy_score
x_train_predict=model.predict(x_train)
traidata_accuracy=accuracy_score(x_train_predict,y_train)
print(traidata_accuracy)
0.5625
x_test_predict=model.predict(x_test)
testdata_accuracy=accuracy_score(x_test_predict,y_test)
print(testdata_accuracy)
0.5760869565217391
input_da=([917,57,2,3,1,-0.659455,0.125058,1,1.574882,-1.437394,0,-0.837596,6,5,0])
new=np.asarray(input_da)
new_resha=new.reshape(1,-1)
prediction=model.predict(new_resha)
print(prediction)
[0]
C:\Users\STS\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names warnings.warn(
if(prediction[0]==0):
print('no heart dieseas')
elif(prediction[0]==1):
print('1st storke')
elif(prediction[0]==2):
print('2nd stroke')
elif(prediction[0]==3):
print('3rd storke')
no heart dieseas