In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Step 1: Data Acquisition
df = pd.read_csv('RealEstateAU_1000_Samples.csv')

# Step 2: Data Preprocessing
# Handle Missing Values
imputer = SimpleImputer(strategy='mean')
df['feature1'] = imputer.fit_transform(df[['feature1']])

# Encoding Categorical Variables (if applicable)
# Assuming 'location' is a categorical variable
df = pd.get_dummies(df, columns=['location'])

# Feature Scaling
scaler = StandardScaler()
df[['feature2', 'feature3']] = scaler.fit_transform(df[['feature2', 'feature3']])

# Feature Selection (if applicable)
# Select the relevant features based on domain knowledge or feature importance analysis

# Splitting the Data
X = df.drop('target_variable', axis=1)
y = df['target_variable']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Regression and Classification Techniques
regression_models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Random Forest Regression': RandomForestRegressor(),
    'Gradient Boosting Regression': GradientBoostingRegressor()
}

classification_models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree Classification': DecisionTreeClassifier(),
    'Random Forest Classification': RandomForestClassifier(),
    'Gradient Boosting Classification': GradientBoostingClassifier()
}

# Step 4: Metrics for Regression and Classification
def evaluate_regression_model(model, X, y):
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y, y_pred)
    return mse, rmse, r2

def evaluate_classification_model(model, X, y):
    y_pred = model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    return accuracy, precision, recall, f1

# Step 5: Pipeline/Hyperparameter Tuning Techniques
# Example using GridSearchCV for hyperparameter tuning
param_grid = {
    'max_depth': [3, 5, 7],
    'n_estimators': [50, 100, 200]
}

grid_search = GridSearchCV(estimator=RandomForestRegressor(), param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

# Evaluation
mse, rmse, r2 = evaluate_regression_model(best_model, X_test, y_test)
print(f"Best Model - MSE: {mse}, RMSE: {rmse}, R2: {r2}")

# Repeat the
