#Assignment
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df_internet = pd.read_csv('C:\\Users\\MNLVBPV\internet_session.csv',parse_dates = ['start_time'])
df_internet
name | start_time | usage_time | IP | MAC | upload | download | total_transfer | seession_break_reason | |
---|---|---|---|---|---|---|---|---|---|
0 | user1 | 2022-05-10 02:59:32 | 00:00:36:28 | 10.55.14.222 | 48:E7:DA:58:22:E9 | 15861.76 | 333168.64 | 349030.40 | Idle-Timeout |
1 | user1 | 2022-05-10 18:53:27 | 00:01:49:56 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 16957.44 | 212152.32 | 229109.76 | Idle-Timeout |
2 | user1 | 2022-05-10 21:20:44 | 00:01:35:00 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 14080.0 | 195153.92 | 209233.92 | Idle-Timeout |
3 | user1 | 2022-05-11 00:37:42 | 00:00:26:00 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 5242.88 | 40806.4 | 46049.28 | Idle-Timeout |
4 | user1 | 2022-05-11 02:59:38 | 00:00:11:52 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 22067.2 | 10772.48 | 32839.68 | Idle-Timeout |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4707 | user9 | 2022-11-04 01:11:34 | 00:06:54:32 | 10.55.4.189 | DA:2F:97:0E:B7:D0 | 107960.32 | 2390753.28 | 2495610.88 | Idle-Timeout |
4708 | user9 | 2022-11-04 10:26:09 | 00:00:23:49 | 10.55.4.59 | DA:2F:97:0E:B7:D0 | 11407.36 | 209674.24 | 221081.60 | Idle-Timeout |
4709 | user9 | 2022-11-04 20:41:42 | 00:01:24:13 | 10.55.15.186 | DA:2F:97:0E:B7:D0 | 18995.2 | 373657.6 | 392652.80 | Idle-Timeout |
4710 | user9 | 2022-11-05 00:21:06 | 00:08:49:43 | 10.55.4.159 | DA:2F:97:0E:B7:D0 | 46602.24 | 593766.4 | 640368.64 | Idle-Timeout |
4711 | user9 | 2022-11-05 20:55:37 | 00:01:06:20 | 10.55.2.33 | DA:2F:97:0E:B7:D0 | 21237.76 | 298536.96 | 319774.72 | NaN |
4712 rows × 9 columns
df_internet.shape
(4712, 9)
#What is the most frequent internet activity time of the day ?
df_internet['hour'] = pd.to_datetime(df_internet['start_time']).dt.hour
frequent_activity= df_internet['hour'].value_counts().sort_index()
plt.figure(figsize=(18,9))
sns.lineplot(data=frequent_activity)
plt.xticks(np.linspace(start =0,stop =24,num=25))
plt.show()
plt.clf()
<Figure size 432x288 with 0 Axes>
# How often the ip changes ?
base_ip = '48:E7:DA:58:22:E9'
ip_count = 0
for i in range(1,df_internet.shape[0]):
if df_internet.iloc[i]['IP'] !=base_ip:
ip_count =+1
base_ip = df_internet.iloc[i]['IP']
print("the ip address changed " + str(ip_count) + ' times')
the ip address changed 1 times
# How often the device changed.
base_device = 'download'
device_count = 0
for i in range(1, df_internet.shape[0]):
if df_internet.iloc[i]['download'] != base_device:
device_count +=1
base_device = df_internet.iloc[i]['download']
print('The device changed ' + str(device_count) + ' times')
The device changed 4710 times
# What is the average usage per hour , per day and per month ?
df_internet.reset_index(inplace=True)
df_internet['day'] = df_internet['start_time'].dt.day
df_internet['month'] = df_internet['start_time'].dt.month
hourly_average = df_internet.groupby('hour').total_transfer.mean()
print('The Average usage per hour is:\n ' + str(round(hourly_average, 2)))
The Average usage per hour is: hour 0 464530.44 1 530880.86 2 431576.11 3 345303.34 4 359809.44 5 275960.91 6 468959.59 7 292886.83 8 366681.92 9 377480.64 10 393259.12 11 309492.45 12 310137.98 13 335270.58 14 472403.71 15 517005.11 16 403919.40 17 525423.69 18 665414.45 19 390839.43 20 355740.06 21 471461.40 22 449600.50 23 407785.08 Name: total_transfer, dtype: float64