import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mpl_toolkits
import numpy as np
import scipy.stats as stats
%matplotlib inline
from sklearn.model_selection import train_test_split
df=pd.read_csv(r"C:\Users\Ismail\Desktop\RealEstateAU_1000_Samples.csv",skipinitialspace = True)
df
index | TID | breadcrumb | category_name | property_type | building_size | land_size | preferred_size | open_date | listing_agency | ... | state | zip_code | phone | latitude | longitude | product_depth | bedroom_count | bathroom_count | parking_count | RunDate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 1350988 | Buy>NT>DARWIN CITY | Real Estate & Property for sale in DARWIN CITY... | House | NaN | NaN | NaN | Added 2 hours ago | Professionals - DARWIN CITY | ... | NT | 800 | 08 8941 8289 | NaN | NaN | premiere | 2.0 | 1.0 | 1.0 | 2022-05-27 15:54:05 |
1 | 1 | 1350989 | Buy>NT>DARWIN CITY | Real Estate & Property for sale in DARWIN CITY... | Apartment | 171m² | NaN | 171m² | Added 7 hours ago | Nick Mousellis Real Estate - Eview Group Member | ... | NT | 800 | 0411724000 | NaN | NaN | premiere | 3.0 | 2.0 | 2.0 | 2022-05-27 15:54:05 |
2 | 2 | 1350990 | Buy>NT>DARWIN CITY | Real Estate & Property for sale in DARWIN CITY... | Unit | NaN | NaN | NaN | Added 22 hours ago | Habitat Real Estate - THE GARDENS | ... | NT | 800 | 08 8981 0080 | NaN | NaN | premiere | 2.0 | 1.0 | 1.0 | 2022-05-27 15:54:05 |
3 | 3 | 1350991 | Buy>NT>DARWIN CITY | Real Estate & Property for sale in DARWIN CITY... | House | NaN | NaN | NaN | Added yesterday | Ray White - NIGHTCLIFF | ... | NT | 800 | 08 8982 2403 | NaN | NaN | premiere | 1.0 | 1.0 | 0.0 | 2022-05-27 15:54:05 |
4 | 4 | 1350992 | Buy>NT>DARWIN CITY | Real Estate & Property for sale in DARWIN CITY... | Unit | 201m² | NaN | 201m² | Added yesterday | Carol Need Real Estate - Fannie Bay | ... | NT | 800 | 0418885966 | NaN | NaN | premiere | 3.0 | 2.0 | 2.0 | 2022-05-27 15:54:05 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 995 | 1351983 | Buy>NT>DARWIN | Real Estate & Property for sale in DARWIN, NT ... | House | NaN | 9.17ha | 9.17ha | Under offer | United Realty NT - Parap | ... | NT | 834 | 08 8981 2666 | NaN | NaN | feature | 4.0 | 3.0 | 6.0 | 2022-05-27 15:54:05 |
996 | 996 | 1351984 | Buy>NT>DARWIN | Real Estate & Property for sale in DARWIN, NT ... | House | 203m² | 600m² | 600m² | NaN | Kassiou Constructions - HOWARD SPRINGS | ... | NT | 836 | 08 89834326 | NaN | NaN | standard | 4.0 | 2.0 | 2.0 | 2022-05-27 15:54:05 |
997 | 997 | 1351985 | Buy>NT>DARWIN | Real Estate & Property for sale in DARWIN, NT ... | House | 209.6m² | 800m² | 800m² | NaN | Kassiou Constructions - HOWARD SPRINGS | ... | NT | 836 | 08 89834326 | NaN | NaN | standard | 4.0 | 2.0 | 2.0 | 2022-05-27 15:54:05 |
998 | 998 | 1351986 | Buy>NT>DARWIN | Real Estate & Property for sale in DARWIN, NT ... | House | 180m² | 450m² | 450m² | NaN | Kassiou Constructions - HOWARD SPRINGS | ... | NT | 810 | 08 89834326 | NaN | NaN | standard | 4.0 | 2.0 | 3.0 | 2022-05-27 15:54:05 |
999 | 999 | 1351987 | Buy>NT>DARWIN | Real Estate & Property for sale in DARWIN, NT ... | Unit | 120m² | NaN | 120m² | NaN | Home Zone NT - DARWIN | ... | NT | 820 | 0418 895 345 | NaN | NaN | feature | 2.0 | 2.0 | 2.0 | 2022-05-27 15:54:05 |
1000 rows × 27 columns
df.head
<bound method NDFrame.head of index TID breadcrumb \ 0 0 1350988 Buy>NT>DARWIN CITY 1 1 1350989 Buy>NT>DARWIN CITY 2 2 1350990 Buy>NT>DARWIN CITY 3 3 1350991 Buy>NT>DARWIN CITY 4 4 1350992 Buy>NT>DARWIN CITY .. ... ... ... 995 995 1351983 Buy>NT>DARWIN 996 996 1351984 Buy>NT>DARWIN 997 997 1351985 Buy>NT>DARWIN 998 998 1351986 Buy>NT>DARWIN 999 999 1351987 Buy>NT>DARWIN category_name property_type \ 0 Real Estate & Property for sale in DARWIN CITY... House 1 Real Estate & Property for sale in DARWIN CITY... Apartment 2 Real Estate & Property for sale in DARWIN CITY... Unit 3 Real Estate & Property for sale in DARWIN CITY... House 4 Real Estate & Property for sale in DARWIN CITY... Unit .. ... ... 995 Real Estate & Property for sale in DARWIN, NT ... House 996 Real Estate & Property for sale in DARWIN, NT ... House 997 Real Estate & Property for sale in DARWIN, NT ... House 998 Real Estate & Property for sale in DARWIN, NT ... House 999 Real Estate & Property for sale in DARWIN, NT ... Unit building_size land_size preferred_size open_date \ 0 NaN NaN NaN Added 2 hours ago 1 171m² NaN 171m² Added 7 hours ago 2 NaN NaN NaN Added 22 hours ago 3 NaN NaN NaN Added yesterday 4 201m² NaN 201m² Added yesterday .. ... ... ... ... 995 NaN 9.17ha 9.17ha Under offer 996 203m² 600m² 600m² NaN 997 209.6m² 800m² 800m² NaN 998 180m² 450m² 450m² NaN 999 120m² NaN 120m² NaN listing_agency ... state zip_code \ 0 Professionals - DARWIN CITY ... NT 800 1 Nick Mousellis Real Estate - Eview Group Member ... NT 800 2 Habitat Real Estate - THE GARDENS ... NT 800 3 Ray White - NIGHTCLIFF ... NT 800 4 Carol Need Real Estate - Fannie Bay ... NT 800 .. ... ... ... ... 995 United Realty NT - Parap ... NT 834 996 Kassiou Constructions - HOWARD SPRINGS ... NT 836 997 Kassiou Constructions - HOWARD SPRINGS ... NT 836 998 Kassiou Constructions - HOWARD SPRINGS ... NT 810 999 Home Zone NT - DARWIN ... NT 820 phone latitude longitude product_depth bedroom_count \ 0 08 8941 8289 NaN NaN premiere 2.0 1 0411724000 NaN NaN premiere 3.0 2 08 8981 0080 NaN NaN premiere 2.0 3 08 8982 2403 NaN NaN premiere 1.0 4 0418885966 NaN NaN premiere 3.0 .. ... ... ... ... ... 995 08 8981 2666 NaN NaN feature 4.0 996 08 89834326 NaN NaN standard 4.0 997 08 89834326 NaN NaN standard 4.0 998 08 89834326 NaN NaN standard 4.0 999 0418 895 345 NaN NaN feature 2.0 bathroom_count parking_count RunDate 0 1.0 1.0 2022-05-27 15:54:05 1 2.0 2.0 2022-05-27 15:54:05 2 1.0 1.0 2022-05-27 15:54:05 3 1.0 0.0 2022-05-27 15:54:05 4 2.0 2.0 2022-05-27 15:54:05 .. ... ... ... 995 3.0 6.0 2022-05-27 15:54:05 996 2.0 2.0 2022-05-27 15:54:05 997 2.0 2.0 2022-05-27 15:54:05 998 2.0 3.0 2022-05-27 15:54:05 999 2.0 2.0 2022-05-27 15:54:05 [1000 rows x 27 columns]>
df.dropna
<bound method DataFrame.dropna of index TID breadcrumb \ 0 0 1350988 Buy>NT>DARWIN CITY 1 1 1350989 Buy>NT>DARWIN CITY 2 2 1350990 Buy>NT>DARWIN CITY 3 3 1350991 Buy>NT>DARWIN CITY 4 4 1350992 Buy>NT>DARWIN CITY .. ... ... ... 995 995 1351983 Buy>NT>DARWIN 996 996 1351984 Buy>NT>DARWIN 997 997 1351985 Buy>NT>DARWIN 998 998 1351986 Buy>NT>DARWIN 999 999 1351987 Buy>NT>DARWIN category_name property_type \ 0 Real Estate & Property for sale in DARWIN CITY... House 1 Real Estate & Property for sale in DARWIN CITY... Apartment 2 Real Estate & Property for sale in DARWIN CITY... Unit 3 Real Estate & Property for sale in DARWIN CITY... House 4 Real Estate & Property for sale in DARWIN CITY... Unit .. ... ... 995 Real Estate & Property for sale in DARWIN, NT ... House 996 Real Estate & Property for sale in DARWIN, NT ... House 997 Real Estate & Property for sale in DARWIN, NT ... House 998 Real Estate & Property for sale in DARWIN, NT ... House 999 Real Estate & Property for sale in DARWIN, NT ... Unit building_size land_size preferred_size open_date \ 0 NaN NaN NaN Added 2 hours ago 1 171m² NaN 171m² Added 7 hours ago 2 NaN NaN NaN Added 22 hours ago 3 NaN NaN NaN Added yesterday 4 201m² NaN 201m² Added yesterday .. ... ... ... ... 995 NaN 9.17ha 9.17ha Under offer 996 203m² 600m² 600m² NaN 997 209.6m² 800m² 800m² NaN 998 180m² 450m² 450m² NaN 999 120m² NaN 120m² NaN listing_agency ... state zip_code \ 0 Professionals - DARWIN CITY ... NT 800 1 Nick Mousellis Real Estate - Eview Group Member ... NT 800 2 Habitat Real Estate - THE GARDENS ... NT 800 3 Ray White - NIGHTCLIFF ... NT 800 4 Carol Need Real Estate - Fannie Bay ... NT 800 .. ... ... ... ... 995 United Realty NT - Parap ... NT 834 996 Kassiou Constructions - HOWARD SPRINGS ... NT 836 997 Kassiou Constructions - HOWARD SPRINGS ... NT 836 998 Kassiou Constructions - HOWARD SPRINGS ... NT 810 999 Home Zone NT - DARWIN ... NT 820 phone latitude longitude product_depth bedroom_count \ 0 08 8941 8289 NaN NaN premiere 2.0 1 0411724000 NaN NaN premiere 3.0 2 08 8981 0080 NaN NaN premiere 2.0 3 08 8982 2403 NaN NaN premiere 1.0 4 0418885966 NaN NaN premiere 3.0 .. ... ... ... ... ... 995 08 8981 2666 NaN NaN feature 4.0 996 08 89834326 NaN NaN standard 4.0 997 08 89834326 NaN NaN standard 4.0 998 08 89834326 NaN NaN standard 4.0 999 0418 895 345 NaN NaN feature 2.0 bathroom_count parking_count RunDate 0 1.0 1.0 2022-05-27 15:54:05 1 2.0 2.0 2022-05-27 15:54:05 2 1.0 1.0 2022-05-27 15:54:05 3 1.0 0.0 2022-05-27 15:54:05 4 2.0 2.0 2022-05-27 15:54:05 .. ... ... ... 995 3.0 6.0 2022-05-27 15:54:05 996 2.0 2.0 2022-05-27 15:54:05 997 2.0 2.0 2022-05-27 15:54:05 998 2.0 3.0 2022-05-27 15:54:05 999 2.0 2.0 2022-05-27 15:54:05 [1000 rows x 27 columns]>
print(df.columns)
Index(['index', 'TID', 'breadcrumb', 'category_name', 'property_type', 'building_size', 'land_size', 'preferred_size', 'open_date', 'listing_agency', 'price', 'location_number', 'location_type', 'location_name', 'address', 'address_1', 'city', 'state', 'zip_code', 'phone', 'latitude', 'longitude', 'product_depth', 'bedroom_count', 'bathroom_count', 'parking_count', 'RunDate'], dtype='object')
n_df = df[['building_size','bedroom_count','bathroom_count', 'parking_count','price']]
df_n = n_df.fillna(0)
df_n
building_size | bedroom_count | bathroom_count | parking_count | price | |
---|---|---|---|---|---|
0 | 0 | 2.0 | 1.0 | 1.0 | $435,000 |
1 | 171m² | 3.0 | 2.0 | 2.0 | Offers Over $320,000 |
2 | 0 | 2.0 | 1.0 | 1.0 | $310,000 |
3 | 0 | 1.0 | 1.0 | 0.0 | $259,000 |
4 | 201m² | 3.0 | 2.0 | 2.0 | $439,000 |
... | ... | ... | ... | ... | ... |
995 | 0 | 4.0 | 3.0 | 6.0 | 2 Residence |
996 | 203m² | 4.0 | 2.0 | 2.0 | $601,000 |
997 | 209.6m² | 4.0 | 2.0 | 2.0 | $655,000 |
998 | 180m² | 4.0 | 2.0 | 3.0 | $675,000 |
999 | 120m² | 2.0 | 2.0 | 2.0 | $399,000 |
1000 rows × 5 columns
df_n['bedroom_count'].value_counts().plot(kind='bar')
plt.title('number of Bedroom')
plt.xlabel('Bedrooms')
plt.ylabel('Count')
sns.despine
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
df_n['parking_count'].value_counts().plot(kind='bar')
plt.title('parking_count')
plt.xlabel('Parking')
plt.ylabel('Count')
sns.despine
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
plt.scatter(df.bedroom_count,df_n.price)
plt.title("Bedroom and Price ")
plt.xlabel("Bedrooms")
plt.ylabel("Price")
plt.show()
sns.despine
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
df_p = df_n.price
df_p
0 $435,000 1 Offers Over $320,000 2 $310,000 3 $259,000 4 $439,000 ... 995 2 Residence 996 $601,000 997 $655,000 998 $675,000 999 $399,000 Name: price, Length: 1000, dtype: object
df_p.map(lambda x: x.lstrip('$'))
0 435,000 1 Offers Over $320,000 2 310,000 3 259,000 4 439,000 ... 995 2 Residence 996 601,000 997 655,000 998 675,000 999 399,000 Name: price, Length: 1000, dtype: object
import re
df_p = df_p.apply(lambda row: re.sub("[^0-9]"," ", row))
df_p
0 435 000 1 320 000 2 310 000 3 259 000 4 439 000 ... 995 2 996 601 000 997 655 000 998 675 000 999 399 000 Name: price, Length: 1000, dtype: object
from sklearn.linear_model import LinearRegression
X=df_n[['bedroom_count','bathroom_count', 'parking_count']]
y=df_p
X
bedroom_count | bathroom_count | parking_count | |
---|---|---|---|
0 | 2.0 | 1.0 | 1.0 |
1 | 3.0 | 2.0 | 2.0 |
2 | 2.0 | 1.0 | 1.0 |
3 | 1.0 | 1.0 | 0.0 |
4 | 3.0 | 2.0 | 2.0 |
... | ... | ... | ... |
995 | 4.0 | 3.0 | 6.0 |
996 | 4.0 | 2.0 | 2.0 |
997 | 4.0 | 2.0 | 2.0 |
998 | 4.0 | 2.0 | 3.0 |
999 | 2.0 | 2.0 | 2.0 |
1000 rows × 3 columns
y
0 435 000 1 320 000 2 310 000 3 259 000 4 439 000 ... 995 2 996 601 000 997 655 000 998 675 000 999 399 000 Name: price, Length: 1000, dtype: object
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
X_train
bedroom_count | bathroom_count | parking_count | |
---|---|---|---|
175 | 1.0 | 1.0 | 2.0 |
818 | 3.0 | 2.0 | 2.0 |
677 | 4.0 | 2.0 | 2.0 |
952 | 1.0 | 1.0 | 1.0 |
200 | 3.0 | 1.0 | 3.0 |
... | ... | ... | ... |
534 | 6.0 | 2.0 | 6.0 |
584 | 3.0 | 1.0 | 2.0 |
493 | 3.0 | 1.0 | 1.0 |
527 | 4.0 | 2.0 | 2.0 |
168 | 3.0 | 2.0 | 2.0 |
800 rows × 3 columns
len(X_train)
800
X_test
bedroom_count | bathroom_count | parking_count | |
---|---|---|---|
37 | 1.0 | 1.0 | 1.0 |
726 | 4.0 | 2.0 | 2.0 |
846 | 4.0 | 2.0 | 4.0 |
295 | 3.0 | 1.0 | 4.0 |
924 | 2.0 | 2.0 | 2.0 |
... | ... | ... | ... |
839 | 1.0 | 1.0 | 0.0 |
810 | 3.0 | 1.0 | 2.0 |
930 | 3.0 | 2.0 | 2.0 |
616 | 4.0 | 2.0 | 2.0 |
809 | 3.0 | 1.0 | 2.0 |
200 rows × 3 columns
len(X_test)
200
y_test
37 465 000 726 750 000 846 295 629 000 924 ... 839 810 462 500 930 515 000 616 559 000 809 375 000 Name: price, Length: 200, dtype: object
y_train
175 399 000 818 865 000 677 639 000 952 235 000 200 419 000 ... 534 584 375 000 493 395 000 527 168 480 000 Name: price, Length: 800, dtype: object
clf = LinearRegression()
clf.fit(X_train,y_train)
clf.predict(X_test)
clf.score(X_test,y_test)