# import libraries


import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


#getting dataset


data = pd.read_csv('c:/Users/STS/Desktop/heartdata.csv')
data.head()


data.shape

(920, 16)


data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 920 entries, 0 to 919
Data columns (total 16 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   ff        920 non-null    int64  
 1   age       920 non-null    int64  
 2   sex       920 non-null    object 
 3   dataset   920 non-null    object 
 4   cp        920 non-null    object 
 5   trestbps  861 non-null    float64
 6   chol      890 non-null    float64
 7   fbs       830 non-null    object 
 8   restecg   918 non-null    object 
 9   thalch    865 non-null    float64
 10  exang     865 non-null    object 
 11  oldpeak   858 non-null    float64
 12  slope     611 non-null    object 
 13  ca        309 non-null    float64
 14  thal      434 non-null    object 
 15  num       920 non-null    int64  
dtypes: float64(5), int64(3), object(8)
memory usage: 115.1+ KB


data['dataset'].value_counts()

Cleveland        304
Hungary          293
VA Long Beach    200
Switzerland      123
Name: dataset, dtype: int64


#data transformation


col=['age','fbs','exang']
for col in data:
    le= LabelEncoder()
    data[col]=le.fit_transform(data[col])


data.tail()


#filling not a number with statistical values


data['oldpeak'].fillna(data['oldpeak'].mean(),inplace=True)
data.tail()


#standardisation of the data


ss=StandardScaler()
col=['restecg','thalch','chol','trestbps','oldpeak']
data[col]=ss.fit_transform(data[col])
data.tail()


data['trestbps'].value_counts()

-0.751059    131
-0.106627    115
 0.322994    102
-1.323887     59
 2.041479     59
            ... 
-1.753508      1
-0.607852      1
 1.540254      1
-0.178231      1
-0.321438      1
Name: trestbps, Length: 62, dtype: int64


x=data.drop(columns='num',axis=1)
y=data['num']


#training machine


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)
print(x.shape,x_train.shape,x_test.shape)

(920, 15) (736, 15) (184, 15)


#predicting accuracy of the model


from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
model.fit(x_train,y_train)

C:\Users\STS\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(

LogisticRegression()


from sklearn.metrics import accuracy_score
x_train_predict=model.predict(x_train)
traidata_accuracy=accuracy_score(x_train_predict,y_train)
print(traidata_accuracy)

0.5625


x_test_predict=model.predict(x_test)
testdata_accuracy=accuracy_score(x_test_predict,y_test)
print(testdata_accuracy)

0.5760869565217391


input_da=([917,57,2,3,1,-0.659455,0.125058,1,1.574882,-1.437394,0,-0.837596,6,5,0])
new=np.asarray(input_da)
new_resha=new.reshape(1,-1)
prediction=model.predict(new_resha)
print(prediction)

[0]

C:\Users\STS\anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names
  warnings.warn(


if(prediction[0]==0):
    print('no heart dieseas')
elif(prediction[0]==1):
    print('1st storke')
elif(prediction[0]==2):
    print('2nd stroke')
elif(prediction[0]==3):
    print('3rd storke')

no heart dieseas

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
0	1	63	Male	Cleveland	typical angina	145.0	233.0	True	lv hypertrophy	150.0	False	2.3	downsloping	0.0	fixed defect	0
1	2	67	Male	Cleveland	asymptomatic	160.0	286.0	False	lv hypertrophy	108.0	True	1.5	flat	3.0	normal	2
2	3	67	Male	Cleveland	asymptomatic	120.0	229.0	False	lv hypertrophy	129.0	True	2.6	flat	2.0	reversable defect	1
3	4	37	Male	Cleveland	non-anginal	130.0	250.0	False	normal	187.0	False	3.5	downsloping	0.0	normal	0
4	5	41	Female	Cleveland	atypical angina	130.0	204.0	False	lv hypertrophy	172.0	False	1.4	upsloping	0.0	normal	0

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	-0.321438	1.553066	1	1.574882	0.465705	0	-0.837596	3	4	3	1
916	916	34	1	3	3	2.041479	-1.216343	0	1.574882	1.866099	2	2.434380	3	4	3	0
917	917	27	1	3	0	-0.679455	-0.115058	1	1.574882	-1.437394	0	-0.837596	3	4	0	2
918	918	30	1	3	0	2.041479	1.876974	1	-1.557856	1.866099	2	2.434380	3	4	3	0
919	919	34	1	3	1	-0.751059	0.386999	0	-1.557856	-1.688747	1	-0.837596	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1

	ff	age	sex	dataset	cp	trestbps	chol	fbs	restecg	thalch	exang	oldpeak	slope	ca	thal	num
915	915	26	0	3	0	28	179	1	2	80	0	10	3	4	3	1
916	916	34	1	3	3	61	8	0	2	119	2	53	3	4	3	0
917	917	27	1	3	0	23	76	1	2	27	0	10	3	4	0	2
918	918	30	1	3	0	61	199	1	0	119	2	53	3	4	3	0
919	919	34	1	3	1	22	107	0	0	20	1	10	3	4	3	1