import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Load the data df = pd.read_csv("internet_session.csv") # Preprocess the data df["hour"] = df["timestamp"].apply(lambda x: int(x.split(":")[0])) # Extract features X = df[["hour"]] y = df["most_active_time"] # Split the data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Train a random forest classifier clf = RandomForestClassifier(n_estimators=100, random_state=0) clf.fit(X_train, y_train) # Make predictions on the test set y_pred = clf.predict(X_test) # Evaluate the model acc = accuracy_score(y_test, y_pred) print("Accuracy:", acc) --------------------------------------------------- import pandas as pd from sklearn.svm import SVR from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error # Load the data df = pd.read_csv("nternet_session.csv") # Preprocess the data df["timestamp"] = df["timestamp"].apply(lambda x: int(x.split(":")[0])) # Extract features X = df.groupby("user_id").apply(lambda x: x["timestamp"].diff().dropna().mean()).reset_index(name="mean_time_between_ip_changes") y = df.groupby("user_id").apply(lambda x: x["timestamp"].diff().dropna().count()).reset_index(name="frequency_of_ip_changes")["frequency_of_ip_changes"] # Split the data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Train a support vector regression model reg = SVR() reg.fit(X_train, y_train) # Make predictions on the test set y_pred = reg.predict(X_test) # Evaluate the model mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) ------------------------------------------------- import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # Load the data into a pandas dataframe data = pd.read_csv("nternet_session.csv") # Create a feature for the number of days since the last device change data["days_since_last_change"] = (data["timestamp"] - data.groupby("device_id")["timestamp"].shift(1)).dt.days # Create a new feature indicating if a device change occurred at the current observation data["device_change"] = np.where(data["days_since_last_change"] > 0, 1, 0) # Split the data into training and testing sets X = data[["device_id", "timestamp", "days_since_last_change"]] y = data["device_change"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train a random forest classifier on the training data model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Make predictions on the test data y_pred = model.predict(X_test) # Evaluate the accuracy of the model accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) ------------------------------------------------- import pandas as pd # Load the internet usage data into a Pandas dataframe df = pd.read_csv("nternet_session.csv") # Convert the 'timestamp' column to datetime format df['timestamp'] = pd.to_datetime(df['timestamp']) # Add columns for hour, day, and month of each usage df['hour'] = df['timestamp'].dt.hour df['day'] = df['timestamp'].dt.day df['month'] = df['timestamp'].dt.month # Calculate the average usage per hour average_usage_per_hour = df.groupby(['hour'])['usage'].mean() # Calculate the average usage per day average_usage_per_day = df.groupby(['day'])['usage'].mean() # Calculate the average usage per month average_usage_per_month = df.groupby(['month'])['usage'].mean() print("Average usage per hour:") print(average_usage_per_hour) print("Average usage per day:") print(average_usage_per_day) print("Average usage per month:") print(average_usage_per_month)