import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV # Load the housing data df = pd.read_csv("RealEstateAU_1000_Samples.csv") # Split the data into features and target X = df.drop("price", axis=1) y = df["price"] # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Define the Regression pipeline reg_pipeline = Pipeline([ ('scaler', StandardScaler()), ('regressor', LinearRegression()) ]) # Define the Classification pipeline clf_pipeline = Pipeline([ ('scaler', StandardScaler()), ('classifier', SVC()) ]) # Define the hyperparameters for the Regression pipeline reg_parameters = { 'regressor__fit_intercept': [True, False], 'regressor__normalize': [True, False] } # Define the hyperparameters for the Classification pipeline clf_parameters = { 'classifier__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'classifier__C': [0.1, 1, 10, 100, 1000], 'classifier__gamma': ['scale', 'auto'] } # Use GridSearchCV to perform hyperparameter tuning on the Regression pipeline reg_grid = GridSearchCV(reg_pipeline, reg_parameters, cv=5, scoring='neg_mean_squared_error', verbose=1) reg_grid.fit(X_train, y_train) # Use GridSearchCV to perform hyperparameter tuning on the Classification pipeline clf_grid = GridSearchCV(clf_pipeline, clf_parameters, cv=5, scoring='accuracy', verbose=1) clf_grid.fit(X_train, y_train) # Print the best hyperparameters for the Regression pipeline print("Best hyperparameters for Regression pipeline:", reg_grid.best_params_) # Print the best hyperparameters for the Classification pipeline print("Best hyperparameters for Classification pipeline:", clf_grid.best_params_) # Predict the housing prices using the optimized Regression pipeline y_pred_reg = reg_grid.predict(X_test) # Predict the housing prices using the optimized Classification pipeline y_pred_clf = clf_