import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, confusion_matrix, classification_report # Load the heart disease data df = pd.read_csv("heart_disease_uci.csv") # Get an overview of the data print(df.head()) print(df.describe()) # Visualize the distribution of the target variable sns.countplot(x='target', data=df) plt.show() # Visualize the correlation between the features sns.heatmap(df.corr(), annot=True) plt.show() # Split the data into features and target X = df.drop("target", axis=1) y = df["target"] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Define the pipeline pipeline = Pipeline([ ('scaler', StandardScaler()), ('classifier', LogisticRegression()) ]) # Fit the pipeline to the training data pipeline.fit(X_train, y_train) # Predict the target using the pipeline y_pred = pipeline.predict(X_test) # Evaluate the performance of the pipeline print("Accuracy:", accuracy_score(y_test, y_pred)) print("Confusion Matrix:", confusion_matrix(y_test, y_pred)) print("Classification Report:", classification_report(y_test, y_pred)) # Experiment with a different classifier (Random Forest) rf_pipeline = Pipeline([ ('scaler', StandardScaler()), ('classifier', RandomForestClassifier()) ]) # Fit the Random Forest pipeline to the training data rf_pipeline.fit(X_train, y_train) # Predict the target using the Random Forest pipeline y_pred_rf = rf_pipeline.predict(X_test) # Evaluate the performance of the Random Forest pipeline print("Accuracy:", accuracy_score(y_test, y_pred_rf)) print("Confusion Matrix:", confusion_matrix(y_test, y_pred_rf)) print("Classification Report:", classification_report(y_test, y_pred_rf))