# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
= pd.read_csv('/kaggle/input/worst-plane-crashes-in-history/plane_crash_info.csv')
df ='Unnamed: 0', inplace=True)
df.drop(columns df.head()
Fatal | Date | Location | Carrier | Flight | Type | |
---|---|---|---|---|---|---|
0 | 2907* | 09/11/2001 | New York City, New York | American /United Airlines | 11/93 | B767 / B767 |
1 | 583 | 03/27/1977 | Tenerife, Canary Islands | Pan Am / KLM | 1736/4805 | B747 / B747 |
2 | 520 | 08/12/1985 | Mt. Osutaka, Japan | Japan Air Lines | 123 | B747 |
3 | 349 | 11/12/1996 | New Delhi, India | Saudi / Kazastan | 763/1907 | B747 / Il76 |
4 | 346 | 03/03/1974 | Bois d' Ermenonville, France | Turkish Airlines | 981 | DC10 |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Fatal 104 non-null object
1 Date 104 non-null object
2 Location 104 non-null object
3 Carrier 104 non-null object
4 Flight 104 non-null object
5 Type 104 non-null object
dtypes: object(6)
memory usage: 5.0+ KB
sum() df.isnull().
Fatal 0
Date 0
Location 0
Carrier 0
Flight 0
Type 0
dtype: int64
df.describe()
Fatal | Date | Location | Carrier | Flight | Type | |
---|---|---|---|---|---|---|
count | 104 | 104 | 104 | 104 | 104 | 104 |
unique | 73 | 102 | 97 | 93 | 90 | 45 |
top | 154 | 09/11/2001 | Tenerife, Canary Islands | Aeroflot | - | B727 |
freq | 4 | 2 | 3 | 4 | 10 | 11 |
= df.groupby('Location').size().sort_values(ascending=False)
fatal_by_location
= fatal_by_location.head(10)
top_locations
=(14, 8))
plt.figure(figsize='bar', color='skyblue', edgecolor='black')
top_locations.plot(kind'Top Locations with the Highest Number of Fatal Incidents', fontsize=20)
plt.title('Location', fontsize=16)
plt.xlabel('Number of Fatal Incidents', fontsize=16)
plt.ylabel(=45, ha='right')
plt.xticks(rotation
plt.tight_layout()'grey')
plt.gcf().patch.set_facecolor('yellow')
plt.gca().set_facecolor( plt.show()
'Date'] = pd.to_datetime(df['Date'])
df[
'Month'] = df['Date'].dt.month
df['Hour'] = df['Date'].dt.hour
df[
= df.groupby('Month').size()
fatal_by_month
= df.groupby('Hour').size()
fatal_by_hour
=(12, 6))
plt.figure(figsize1, 2, 1)
plt.subplot(='line', marker='o', color='green')
fatal_by_month.plot(kind'Fatal Incidents by Month')
plt.title('Month')
plt.xlabel('Number of Fatal Incidents')
plt.ylabel(range(1, 13))
plt.xticks(True)
plt.grid(
1, 2, 2)
plt.subplot(='line', marker='o', color='black')
fatal_by_hour.plot(kind'Fatal Incidents by Hour')
plt.title('Hour of the Day')
plt.xlabel('Number of Fatal Incidents')
plt.ylabel(range(24))
plt.xticks(True)
plt.grid(
plt.tight_layout() plt.show()
= df['Carrier'].value_counts().head(20)
carrier_counts
=(10, 6))
plt.figure(figsize='bar', color='skyblue', edgecolor='black')
carrier_counts.plot(kind'Number of Fatal Incidents by Carrier', fontsize=20)
plt.title('Carrier', fontsize=16)
plt.xlabel('Number of Fatal Incidents', fontsize=16)
plt.ylabel(=90)
plt.xticks(rotation'coral')
plt.gcf().patch.set_facecolor('gainsboro')
plt.gca().set_facecolor(
plt.show()
= df['Type'].value_counts().head(20)
aircraft_counts =(10, 6))
plt.figure(figsize='bar', color='salmon', edgecolor='black')
aircraft_counts.plot(kind'Number of Fatal Incidents by Aircraft Type', fontsize=20)
plt.title('Aircraft Type', fontsize=16)
plt.xlabel('Number of Fatal Incidents', fontsize=16)
plt.ylabel(=45)
plt.xticks(rotation'gold')
plt.gcf().patch.set_facecolor('honeydew')
plt.gca().set_facecolor(
plt.show()
= df['Location'].value_counts().head(20)
locations_counts =(12, 8))
plt.figure(figsize='bar', color='lightgreen', edgecolor='black')
locations_counts.plot(kind'Number of Fatal Incidents by Location', fontsize=20)
plt.title('Number of Fatal Incidents', fontsize=16)
plt.xlabel('Location', fontsize=16)
plt.ylabel(=90)
plt.xticks(rotation'khaki')
plt.gcf().patch.set_facecolor('lightcyan')
plt.gca().set_facecolor( plt.show()