#importing libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder as lb
#reading the dataset
d = pd.read_csv('c:/Users/STS/Desktop/ip.csv')
d.head()
name | start_time | usage_time | IP | MAC | upload | download | total_transfer | seession_break_reason | |
---|---|---|---|---|---|---|---|---|---|
0 | user1 | 5/10/2022 2:59 | 00:00:36:28 | 10.55.14.222 | 48:E7:DA:58:22:E9 | 15861.76 | 333168.64 | 349030.40 | Idle-Timeout |
1 | user1 | 5/10/2022 18:53 | 00:01:49:56 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 16957.44 | 212152.32 | 229109.76 | Idle-Timeout |
2 | user1 | 5/10/2022 21:20 | 00:01:35:00 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 14080 | 195153.92 | 209233.92 | Idle-Timeout |
3 | user1 | 5/11/2022 0:37 | 00:00:26:00 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 5242.88 | 40806.4 | 46049.28 | Idle-Timeout |
4 | user1 | 5/11/2022 2:59 | 00:00:11:52 | 10.55.2.253 | 48:E7:DA:58:22:E9 | 22067.2 | 10772.48 | 32839.68 | Idle-Timeout |
#how often device chsnges
d['MAC'].value_counts()
48:E7:DA:58:22:E9 1236 DA:2F:97:0E:B7:D0 561 92:02:4F:EE:EB:3F 481 C8:3D:DC:CF:16:C6 465 F8:89:D2:D4:AE:8B 362 D8:9C:67:BA:DC:B9 242 C2:BB:83:2B:FF:5A 237 A8:93:4A:7E:34:6F 231 2A:7F:97:6A:10:51 189 C6:2C:1D:5E:86:70 147 FA:B5:D7:B1:A4:6B 128 0C:F3:46:71:E2:27 104 36:D2:09:C8:FA:0D 73 68:14:01:09:51:71 70 C0:E4:34:D5:88:0F 54 9C:29:76:F1:E3:0C 36 E0:D0:45:5E:60:F5 23 B0:68:E6:9E:AB:77 12 E8:6F:38:A4:F8:2F 12 CA:48:87:B2:A5:12 11 32:25:C6:7D:5B:54 7 EC:2E:98:CB:B7:8D 7 94:17:00:37:AF:A8 5 90:32:4B:3B:10:DB 4 AA:E1:02:47:2B:0A 3 86:12:FC:78:C9:94 2 72:47:85:EE:2F:8C 2 B6:99:3E:1D:FB:19 2 7C:10:C9:AD:6E:E6 2 90:E8:68:F2:9C:21 1 E2:42:6C:64:6E:48 1 EC:2E:98:93:B5:D3 1 80:F3:EF:36:7D:AD 1 Name: MAC, dtype: int64
#how often the ip changes
d['IP'].value_counts()
10.55.0.89 80 10.55.14.148 64 10.55.15.221 55 10.55.1.50 48 10.55.10.46 44 .. 10.55.14.67 1 10.55.7.44 1 10.55.12.225 1 10.55.12.190 1 10.55.2.33 1 Name: IP, Length: 1302, dtype: int64
#most frequent time of internet usage
d['start_time'].mode()
0 10/2/2022 14:12 1 11/3/2022 19:36 2 11/3/2022 21:36 3 5/19/2022 0:56 4 5/31/2022 8:59 5 6/12/2022 10:37 6 6/12/2022 22:55 7 6/15/2022 18:13 8 6/23/2022 22:28 9 6/28/2022 1:03 10 6/8/2022 13:46 11 7/6/2022 19:53 12 8/14/2022 9:51 13 8/20/2022 0:20 14 8/25/2022 13:22 15 8/28/2022 11:25 16 9/1/2022 22:50 17 9/4/2022 9:47 Name: start_time, dtype: object
#no.of elements in the dataset
d.shape
(4712, 9)
#average usage per hour,per day,per month
x=d['usage_time'].value_counts()
print(x/60) #average usage per hour
print(x/24*60) #average usage per day
print(x/(24*60*30)) #average usage per month
00:00:25:14 0.116667 00:00:26:00 0.083333 00:00:18:37 0.083333 00:00:22:44 0.083333 00:00:00:38 0.083333 ... 00:05:02:11 0.016667 00:01:00:19 0.016667 00:05:42:28 0.016667 00:00:24:21 0.016667 00:01:06:20 0.016667 Name: usage_time, Length: 3558, dtype: float64 00:00:25:14 17.5 00:00:26:00 12.5 00:00:18:37 12.5 00:00:22:44 12.5 00:00:00:38 12.5 ... 00:05:02:11 2.5 00:01:00:19 2.5 00:05:42:28 2.5 00:00:24:21 2.5 00:01:06:20 2.5 Name: usage_time, Length: 3558, dtype: float64 00:00:25:14 0.000162 00:00:26:00 0.000116 00:00:18:37 0.000116 00:00:22:44 0.000116 00:00:00:38 0.000116 ... 00:05:02:11 0.000023 00:01:00:19 0.000023 00:05:42:28 0.000023 00:00:24:21 0.000023 00:01:06:20 0.000023 Name: usage_time, Length: 3558, dtype: float64