# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/kaggle/input/worst-plane-crashes-in-history/plane_crash_info.csv')
df.drop(columns='Unnamed: 0', inplace=True)
df.head()
Fatal Date Location Carrier Flight Type
0 2907* 09/11/2001 New York City, New York American /United Airlines 11/93 B767 / B767
1 583 03/27/1977 Tenerife, Canary Islands Pan Am / KLM 1736/4805 B747 / B747
2 520 08/12/1985 Mt. Osutaka, Japan Japan Air Lines 123 B747
3 349 11/12/1996 New Delhi, India Saudi / Kazastan 763/1907 B747 / Il76
4 346 03/03/1974 Bois d' Ermenonville, France Turkish Airlines 981 DC10
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Fatal     104 non-null    object
 1   Date      104 non-null    object
 2   Location  104 non-null    object
 3   Carrier   104 non-null    object
 4   Flight    104 non-null    object
 5   Type      104 non-null    object
dtypes: object(6)
memory usage: 5.0+ KB
df.isnull().sum()
Fatal       0
Date        0
Location    0
Carrier     0
Flight      0
Type        0
dtype: int64
df.describe()
Fatal Date Location Carrier Flight Type
count 104 104 104 104 104 104
unique 73 102 97 93 90 45
top 154 09/11/2001 Tenerife, Canary Islands Aeroflot - B727
freq 4 2 3 4 10 11
fatal_by_location = df.groupby('Location').size().sort_values(ascending=False)

top_locations = fatal_by_location.head(10)

plt.figure(figsize=(14, 8))
top_locations.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Top Locations with the Highest Number of Fatal Incidents', fontsize=20)
plt.xlabel('Location', fontsize=16)
plt.ylabel('Number of Fatal Incidents', fontsize=16)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.gcf().patch.set_facecolor('grey')
plt.gca().set_facecolor('yellow')
plt.show()

df['Date'] = pd.to_datetime(df['Date'])

df['Month'] = df['Date'].dt.month
df['Hour'] = df['Date'].dt.hour

fatal_by_month = df.groupby('Month').size()

fatal_by_hour = df.groupby('Hour').size()

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
fatal_by_month.plot(kind='line', marker='o', color='green')
plt.title('Fatal Incidents by Month')
plt.xlabel('Month')
plt.ylabel('Number of Fatal Incidents')
plt.xticks(range(1, 13))
plt.grid(True)

plt.subplot(1, 2, 2)
fatal_by_hour.plot(kind='line', marker='o', color='black')
plt.title('Fatal Incidents by Hour')
plt.xlabel('Hour of the Day')
plt.ylabel('Number of Fatal Incidents')
plt.xticks(range(24))
plt.grid(True)

plt.tight_layout()
plt.show()

carrier_counts = df['Carrier'].value_counts().head(20)

plt.figure(figsize=(10, 6))
carrier_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Number of Fatal Incidents by Carrier', fontsize=20)
plt.xlabel('Carrier', fontsize=16)
plt.ylabel('Number of Fatal Incidents', fontsize=16)
plt.xticks(rotation=90)
plt.gcf().patch.set_facecolor('coral')
plt.gca().set_facecolor('gainsboro')
plt.show()

aircraft_counts = df['Type'].value_counts().head(20)
plt.figure(figsize=(10, 6))
aircraft_counts.plot(kind='bar', color='salmon', edgecolor='black')
plt.title('Number of Fatal Incidents by Aircraft Type', fontsize=20)
plt.xlabel('Aircraft Type', fontsize=16)
plt.ylabel('Number of Fatal Incidents', fontsize=16)
plt.xticks(rotation=45)
plt.gcf().patch.set_facecolor('gold')
plt.gca().set_facecolor('honeydew')
plt.show()

locations_counts = df['Location'].value_counts().head(20)
plt.figure(figsize=(12, 8))
locations_counts.plot(kind='bar', color='lightgreen', edgecolor='black')
plt.title('Number of Fatal Incidents by Location', fontsize=20)
plt.xlabel('Number of Fatal Incidents', fontsize=16)
plt.ylabel('Location', fontsize=16)
plt.xticks(rotation=90)
plt.gcf().patch.set_facecolor('khaki')
plt.gca().set_facecolor('lightcyan')
plt.show()