In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mpl_toolkits
import numpy as np
import scipy.stats as stats
%matplotlib inline
from sklearn.model_selection import train_test_split
In [28]:
df=pd.read_csv(r"C:\Users\Ismail\Desktop\internet_session.csv")
In [29]:
df
Out[29]:
name start_time usage_time IP MAC upload download total_transfer seession_break_reason
0 user1 2022-05-10 02:59:32 00:00:36:28 10.55.14.222 48:E7:DA:58:22:E9 15861.76 333168.64 349030.40 Idle-Timeout
1 user1 2022-05-10 18:53:27 00:01:49:56 10.55.2.253 48:E7:DA:58:22:E9 16957.44 212152.32 229109.76 Idle-Timeout
2 user1 2022-05-10 21:20:44 00:01:35:00 10.55.2.253 48:E7:DA:58:22:E9 14080.0 195153.92 209233.92 Idle-Timeout
3 user1 2022-05-11 00:37:42 00:00:26:00 10.55.2.253 48:E7:DA:58:22:E9 5242.88 40806.4 46049.28 Idle-Timeout
4 user1 2022-05-11 02:59:38 00:00:11:52 10.55.2.253 48:E7:DA:58:22:E9 22067.2 10772.48 32839.68 Idle-Timeout
... ... ... ... ... ... ... ... ... ...
4707 user9 2022-11-04 01:11:34 00:06:54:32 10.55.4.189 DA:2F:97:0E:B7:D0 107960.32 2390753.28 2495610.88 Idle-Timeout
4708 user9 2022-11-04 10:26:09 00:00:23:49 10.55.4.59 DA:2F:97:0E:B7:D0 11407.36 209674.24 221081.60 Idle-Timeout
4709 user9 2022-11-04 20:41:42 00:01:24:13 10.55.15.186 DA:2F:97:0E:B7:D0 18995.2 373657.6 392652.80 Idle-Timeout
4710 user9 2022-11-05 00:21:06 00:08:49:43 10.55.4.159 DA:2F:97:0E:B7:D0 46602.24 593766.4 640368.64 Idle-Timeout
4711 user9 2022-11-05 20:55:37 00:01:06:20 10.55.2.33 DA:2F:97:0E:B7:D0 21237.76 298536.96 319774.72 NaN

4712 rows × 9 columns

In [30]:
print(df.columns)
Index(['name', 'start_time', 'usage_time', 'IP', 'MAC', 'upload', 'download',
       'total_transfer', 'seession_break_reason'],
      dtype='object')
In [31]:
df[['usage_time', 'IP', 'MAC']]
Out[31]:
usage_time IP MAC
0 00:00:36:28 10.55.14.222 48:E7:DA:58:22:E9
1 00:01:49:56 10.55.2.253 48:E7:DA:58:22:E9
2 00:01:35:00 10.55.2.253 48:E7:DA:58:22:E9
3 00:00:26:00 10.55.2.253 48:E7:DA:58:22:E9
4 00:00:11:52 10.55.2.253 48:E7:DA:58:22:E9
... ... ... ...
4707 00:06:54:32 10.55.4.189 DA:2F:97:0E:B7:D0
4708 00:00:23:49 10.55.4.59 DA:2F:97:0E:B7:D0
4709 00:01:24:13 10.55.15.186 DA:2F:97:0E:B7:D0
4710 00:08:49:43 10.55.4.159 DA:2F:97:0E:B7:D0
4711 00:01:06:20 10.55.2.33 DA:2F:97:0E:B7:D0

4712 rows × 3 columns

In [32]:
IP_frequency = df['IP'].nunique() / df.shape[0]
print(f"Frequency of IP changes: {IP_frequency:.2f}")
Frequency of IP changes: 0.28
In [33]:
device_frequency = df['MAC'].nunique() / df.shape[0]
print(f"Frequency of device changes: {device_frequency:.2f}")
Frequency of device changes: 0.01
In [35]:
activity_hour = df.groupby(df['usage_time'])['total_transfer'].sum().idxmax()
print(f"Most frequent internet activity time of the day: {activity_hour}:00")
Most frequent internet activity time of the day: 00:03:21:54:00
In [37]:
# Average usage per hour
average_usage_per_hour = df.groupby(df['usage_time'])['total_transfer'].mean()
print(f"Average usage per hour: {average_usage_per_hour}")
Average usage per hour: usage_time
00:00:00:01    1.120000e+00
00:00:00:08    7.313000e+01
00:00:00:09    1.026400e+02
00:00:00:11    7.875050e+02
00:00:00:18    1.402880e+03
                   ...     
00:19:26:09    2.988442e+06
00:19:35:11    2.128609e+06
00:20:39:52    1.233125e+07
00:22:00:07    7.817728e+05
01:00:21:07    4.456448e+06
Name: total_transfer, Length: 3558, dtype: float64
In [38]:
# Average usage per day
average_usage_per_day = df.groupby(df['usage_time'])['total_transfer'].mean()
print(f"Average usage per day: {average_usage_per_day}")
Average usage per day: usage_time
00:00:00:01    1.120000e+00
00:00:00:08    7.313000e+01
00:00:00:09    1.026400e+02
00:00:00:11    7.875050e+02
00:00:00:18    1.402880e+03
                   ...     
00:19:26:09    2.988442e+06
00:19:35:11    2.128609e+06
00:20:39:52    1.233125e+07
00:22:00:07    7.817728e+05
01:00:21:07    4.456448e+06
Name: total_transfer, Length: 3558, dtype: float64
In [39]:
# Average usage per month
average_usage_per_month = df.groupby(df['usage_time'])['total_transfer'].mean()
print(f"Average usage per month: {average_usage_per_month}")
Average usage per month: usage_time
00:00:00:01    1.120000e+00
00:00:00:08    7.313000e+01
00:00:00:09    1.026400e+02
00:00:00:11    7.875050e+02
00:00:00:18    1.402880e+03
                   ...     
00:19:26:09    2.988442e+06
00:19:35:11    2.128609e+06
00:20:39:52    1.233125e+07
00:22:00:07    7.817728e+05
01:00:21:07    4.456448e+06
Name: total_transfer, Length: 3558, dtype: float64
In [ ]: