import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data1 = pd.read_csv('C:/My Files/Python Practice/internet_session.csv', parse_dates=['start_time'])
data1.shape
(4712, 9)
data1.isna().sum()
name 0 start_time 0 usage_time 0 IP 0 MAC 0 upload 0 download 0 total_transfer 0 seession_break_reason 9 dtype: int64
data1 = data1.dropna().copy()
data1.isna().sum()
name 0 start_time 0 usage_time 0 IP 0 MAC 0 upload 0 download 0 total_transfer 0 seession_break_reason 0 dtype: int64
data1.duplicated().sum()
0
data1 = data1.dropna().copy()
data1.isna().sum()
name 0 start_time 0 usage_time 0 IP 0 MAC 0 upload 0 download 0 total_transfer 0 seession_break_reason 0 dtype: int64
data1['usage_time'] = data1['usage_time'].str.replace('00:', '', 1)
data1['usage_time'] = pd.to_datetime(data1['usage_time'])
data1['upload'] = data1['upload'].str.extract('(\d+)', expand=False)
data1_usage.upload = data1.upload.astype(float)
data1['download'] = data1['download'].str.extract('(\d+)', expand=False)
data1.download = data1.download.astype(float)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_34384\3553692667.py in <module> 3 4 data1['upload'] = data1['upload'].str.extract('(\d+)', expand=False) ----> 5 data1_usage.upload = data1.upload.astype(float) 6 7 data1['download'] = data1['download'].str.extract('(\d+)', expand=False) NameError: name 'data1_usage' is not defined
# Create an empty list to store the device names
device = []
# Check if the 'MAC' column exists in the dataframe
if 'MAC' in data1.columns:
# Assign the value of the first MAC address in the 'MAC' column to the variable 'MAC'
MAC = data1['MAC'][0]
device_number = 1
for i in data1['MAC']:
# Check if the current MAC address is the same as the previous one
if i == MAC:
# Append the device name to the list
device.append(basename + str(device_number))
else:
# Increment the device number
device_number += 1
# Append the new device name to the list
device.append(basename + str(device_number))
MAC = i
# Create a new column in the dataframe called 'device'
data1['device'] = device
else:
print("No 'MAC' column found in the dataframe.")
data1.dtypes
name object start_time datetime64[ns] usage_time datetime64[ns] IP object MAC object upload object download object total_transfer float64 seession_break_reason object device object dtype: object
data1['hour'] = pd.to_datetime(data1['start_time']).dt.hour
frequent_activity_time_of_day = data1['hour'].value_counts().sort_index()
plt.figure(figsize=(18, 9))
sns.lineplot(data=frequent_activity_time_of_day)
plt.xticks(np.linspace(start=0, stop=24, num=25))
plt.show()
plt.clf()
<Figure size 640x480 with 0 Axes>
print('The maximum usage time is:')
print(data1.usage_time.max())
The maximum usage time is: 2023-01-28 22:00:07
base_ip = '48:E7:DA:58:22:E9'
ip_count = 0
for i in range(1, data1.shape[0]):
if data1.iloc[i]['IP'] != base_ip:
ip_count +=1
base_ip = data1.iloc[i]['IP']
print('The IP Adress changed ' + str(ip_count) + ' times')
The IP Adress changed 2303 times
base_device = 'device1'
device_count = 0
for i in range(1, data1.shape[0]):
if data1.iloc[i]['device'] != base_device:
device_count +=1
base_device = data1.iloc[i]['device']
print('The device changed ' + str(device_count) + ' times')
The device changed 1223 times
data1.reset_index(inplace=True)
data1['day'] = data1['start_time'].dt.day
data1['month'] = data1['start_time'].dt.month
hourly_average = data1.groupby('hour').total_transfer.mean()
print('The Average usage per hour is:\n ' + str(round(hourly_average, 2)))
The Average usage per hour is: hour 0 464530.44 1 530880.86 2 431576.11 3 345303.34 4 359809.44 5 275960.91 6 468959.59 7 292886.83 8 366681.92 9 377480.64 10 393259.12 11 309492.45 12 310137.98 13 335270.58 14 472403.71 15 517005.11 16 403919.40 17 525423.69 18 666590.76 19 389841.79 20 355862.80 21 474038.34 22 449600.50 23 407785.08 Name: total_transfer, dtype: float64
daily_average = data1.groupby('day').total_transfer.mean()
print('The Average usage per day is:\n ' + str(round(daily_average, 2)))
The Average usage per day is: day 1 396705.04 2 494496.48 3 445865.63 4 676332.03 5 652195.66 6 396261.75 7 402259.89 8 301859.57 9 393521.97 10 350665.02 11 729857.65 12 346695.95 13 501906.70 14 352701.10 15 521520.51 16 426719.39 17 475795.71 18 337490.93 19 301941.32 20 365130.12 21 462211.69 22 486595.37 23 383153.93 24 320598.94 25 443689.47 26 463432.02 27 324318.12 28 494576.34 29 363645.61 30 361418.88 31 369118.01 Name: total_transfer, dtype: float64
monthly_average = data1.groupby('month').total_transfer.mean()
print('The Average usage per month is:\n ' + str(round(monthly_average, 2)))
The Average usage per month is: month 5 311177.16 6 338418.08 7 418583.99 8 479042.44 9 482955.52 10 549467.63 11 399804.11 Name: total_transfer, dtype: float64