In [204]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mpl_toolkits
import numpy as np
import scipy.stats as stats
%matplotlib inline
from sklearn.model_selection import train_test_split
In [220]:
df=pd.read_csv(r"C:\Users\Ismail\Desktop\RealEstateAU_1000_Samples.csv",skipinitialspace = True)
In [227]:
df
Out[227]:
index TID breadcrumb category_name property_type building_size land_size preferred_size open_date listing_agency ... state zip_code phone latitude longitude product_depth bedroom_count bathroom_count parking_count RunDate
0 0 1350988 Buy>NT>DARWIN CITY Real Estate & Property for sale in DARWIN CITY... House NaN NaN NaN Added 2 hours ago Professionals - DARWIN CITY ... NT 800 08 8941 8289 NaN NaN premiere 2.0 1.0 1.0 2022-05-27 15:54:05
1 1 1350989 Buy>NT>DARWIN CITY Real Estate & Property for sale in DARWIN CITY... Apartment 171m² NaN 171m² Added 7 hours ago Nick Mousellis Real Estate - Eview Group Member ... NT 800 0411724000 NaN NaN premiere 3.0 2.0 2.0 2022-05-27 15:54:05
2 2 1350990 Buy>NT>DARWIN CITY Real Estate & Property for sale in DARWIN CITY... Unit NaN NaN NaN Added 22 hours ago Habitat Real Estate - THE GARDENS ... NT 800 08 8981 0080 NaN NaN premiere 2.0 1.0 1.0 2022-05-27 15:54:05
3 3 1350991 Buy>NT>DARWIN CITY Real Estate & Property for sale in DARWIN CITY... House NaN NaN NaN Added yesterday Ray White - NIGHTCLIFF ... NT 800 08 8982 2403 NaN NaN premiere 1.0 1.0 0.0 2022-05-27 15:54:05
4 4 1350992 Buy>NT>DARWIN CITY Real Estate & Property for sale in DARWIN CITY... Unit 201m² NaN 201m² Added yesterday Carol Need Real Estate - Fannie Bay ... NT 800 0418885966 NaN NaN premiere 3.0 2.0 2.0 2022-05-27 15:54:05
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
995 995 1351983 Buy>NT>DARWIN Real Estate & Property for sale in DARWIN, NT ... House NaN 9.17ha 9.17ha Under offer United Realty NT - Parap ... NT 834 08 8981 2666 NaN NaN feature 4.0 3.0 6.0 2022-05-27 15:54:05
996 996 1351984 Buy>NT>DARWIN Real Estate & Property for sale in DARWIN, NT ... House 203m² 600m² 600m² NaN Kassiou Constructions - HOWARD SPRINGS ... NT 836 08 89834326 NaN NaN standard 4.0 2.0 2.0 2022-05-27 15:54:05
997 997 1351985 Buy>NT>DARWIN Real Estate & Property for sale in DARWIN, NT ... House 209.6m² 800m² 800m² NaN Kassiou Constructions - HOWARD SPRINGS ... NT 836 08 89834326 NaN NaN standard 4.0 2.0 2.0 2022-05-27 15:54:05
998 998 1351986 Buy>NT>DARWIN Real Estate & Property for sale in DARWIN, NT ... House 180m² 450m² 450m² NaN Kassiou Constructions - HOWARD SPRINGS ... NT 810 08 89834326 NaN NaN standard 4.0 2.0 3.0 2022-05-27 15:54:05
999 999 1351987 Buy>NT>DARWIN Real Estate & Property for sale in DARWIN, NT ... Unit 120m² NaN 120m² NaN Home Zone NT - DARWIN ... NT 820 0418 895 345 NaN NaN feature 2.0 2.0 2.0 2022-05-27 15:54:05

1000 rows × 27 columns

In [228]:
df.head
Out[228]:
<bound method NDFrame.head of      index      TID          breadcrumb  \
0        0  1350988  Buy>NT>DARWIN CITY   
1        1  1350989  Buy>NT>DARWIN CITY   
2        2  1350990  Buy>NT>DARWIN CITY   
3        3  1350991  Buy>NT>DARWIN CITY   
4        4  1350992  Buy>NT>DARWIN CITY   
..     ...      ...                 ...   
995    995  1351983       Buy>NT>DARWIN   
996    996  1351984       Buy>NT>DARWIN   
997    997  1351985       Buy>NT>DARWIN   
998    998  1351986       Buy>NT>DARWIN   
999    999  1351987       Buy>NT>DARWIN   

                                         category_name property_type  \
0    Real Estate & Property for sale in DARWIN CITY...         House   
1    Real Estate & Property for sale in DARWIN CITY...     Apartment   
2    Real Estate & Property for sale in DARWIN CITY...          Unit   
3    Real Estate & Property for sale in DARWIN CITY...         House   
4    Real Estate & Property for sale in DARWIN CITY...          Unit   
..                                                 ...           ...   
995  Real Estate & Property for sale in DARWIN, NT ...         House   
996  Real Estate & Property for sale in DARWIN, NT ...         House   
997  Real Estate & Property for sale in DARWIN, NT ...         House   
998  Real Estate & Property for sale in DARWIN, NT ...         House   
999  Real Estate & Property for sale in DARWIN, NT ...          Unit   

    building_size land_size preferred_size           open_date  \
0             NaN       NaN            NaN   Added 2 hours ago   
1           171m²       NaN          171m²   Added 7 hours ago   
2             NaN       NaN            NaN  Added 22 hours ago   
3             NaN       NaN            NaN     Added yesterday   
4           201m²       NaN          201m²     Added yesterday   
..            ...       ...            ...                 ...   
995           NaN    9.17ha         9.17ha         Under offer   
996         203m²     600m²          600m²                 NaN   
997       209.6m²     800m²          800m²                 NaN   
998         180m²     450m²          450m²                 NaN   
999         120m²       NaN          120m²                 NaN   

                                      listing_agency  ... state  zip_code  \
0                        Professionals - DARWIN CITY  ...    NT       800   
1    Nick Mousellis Real Estate - Eview Group Member  ...    NT       800   
2                  Habitat Real Estate - THE GARDENS  ...    NT       800   
3                             Ray White - NIGHTCLIFF  ...    NT       800   
4                Carol Need Real Estate - Fannie Bay  ...    NT       800   
..                                               ...  ...   ...       ...   
995                         United Realty NT - Parap  ...    NT       834   
996           Kassiou Constructions - HOWARD SPRINGS  ...    NT       836   
997           Kassiou Constructions - HOWARD SPRINGS  ...    NT       836   
998           Kassiou Constructions - HOWARD SPRINGS  ...    NT       810   
999                            Home Zone NT - DARWIN  ...    NT       820   

            phone latitude longitude product_depth bedroom_count  \
0    08 8941 8289      NaN       NaN      premiere           2.0   
1      0411724000      NaN       NaN      premiere           3.0   
2    08 8981 0080      NaN       NaN      premiere           2.0   
3    08 8982 2403      NaN       NaN      premiere           1.0   
4      0418885966      NaN       NaN      premiere           3.0   
..            ...      ...       ...           ...           ...   
995  08 8981 2666      NaN       NaN       feature           4.0   
996   08 89834326      NaN       NaN      standard           4.0   
997   08 89834326      NaN       NaN      standard           4.0   
998   08 89834326      NaN       NaN      standard           4.0   
999  0418 895 345      NaN       NaN       feature           2.0   

    bathroom_count  parking_count              RunDate  
0              1.0            1.0  2022-05-27 15:54:05  
1              2.0            2.0  2022-05-27 15:54:05  
2              1.0            1.0  2022-05-27 15:54:05  
3              1.0            0.0  2022-05-27 15:54:05  
4              2.0            2.0  2022-05-27 15:54:05  
..             ...            ...                  ...  
995            3.0            6.0  2022-05-27 15:54:05  
996            2.0            2.0  2022-05-27 15:54:05  
997            2.0            2.0  2022-05-27 15:54:05  
998            2.0            3.0  2022-05-27 15:54:05  
999            2.0            2.0  2022-05-27 15:54:05  

[1000 rows x 27 columns]>
In [229]:
df.dropna
Out[229]:
<bound method DataFrame.dropna of      index      TID          breadcrumb  \
0        0  1350988  Buy>NT>DARWIN CITY   
1        1  1350989  Buy>NT>DARWIN CITY   
2        2  1350990  Buy>NT>DARWIN CITY   
3        3  1350991  Buy>NT>DARWIN CITY   
4        4  1350992  Buy>NT>DARWIN CITY   
..     ...      ...                 ...   
995    995  1351983       Buy>NT>DARWIN   
996    996  1351984       Buy>NT>DARWIN   
997    997  1351985       Buy>NT>DARWIN   
998    998  1351986       Buy>NT>DARWIN   
999    999  1351987       Buy>NT>DARWIN   

                                         category_name property_type  \
0    Real Estate & Property for sale in DARWIN CITY...         House   
1    Real Estate & Property for sale in DARWIN CITY...     Apartment   
2    Real Estate & Property for sale in DARWIN CITY...          Unit   
3    Real Estate & Property for sale in DARWIN CITY...         House   
4    Real Estate & Property for sale in DARWIN CITY...          Unit   
..                                                 ...           ...   
995  Real Estate & Property for sale in DARWIN, NT ...         House   
996  Real Estate & Property for sale in DARWIN, NT ...         House   
997  Real Estate & Property for sale in DARWIN, NT ...         House   
998  Real Estate & Property for sale in DARWIN, NT ...         House   
999  Real Estate & Property for sale in DARWIN, NT ...          Unit   

    building_size land_size preferred_size           open_date  \
0             NaN       NaN            NaN   Added 2 hours ago   
1           171m²       NaN          171m²   Added 7 hours ago   
2             NaN       NaN            NaN  Added 22 hours ago   
3             NaN       NaN            NaN     Added yesterday   
4           201m²       NaN          201m²     Added yesterday   
..            ...       ...            ...                 ...   
995           NaN    9.17ha         9.17ha         Under offer   
996         203m²     600m²          600m²                 NaN   
997       209.6m²     800m²          800m²                 NaN   
998         180m²     450m²          450m²                 NaN   
999         120m²       NaN          120m²                 NaN   

                                      listing_agency  ... state  zip_code  \
0                        Professionals - DARWIN CITY  ...    NT       800   
1    Nick Mousellis Real Estate - Eview Group Member  ...    NT       800   
2                  Habitat Real Estate - THE GARDENS  ...    NT       800   
3                             Ray White - NIGHTCLIFF  ...    NT       800   
4                Carol Need Real Estate - Fannie Bay  ...    NT       800   
..                                               ...  ...   ...       ...   
995                         United Realty NT - Parap  ...    NT       834   
996           Kassiou Constructions - HOWARD SPRINGS  ...    NT       836   
997           Kassiou Constructions - HOWARD SPRINGS  ...    NT       836   
998           Kassiou Constructions - HOWARD SPRINGS  ...    NT       810   
999                            Home Zone NT - DARWIN  ...    NT       820   

            phone latitude longitude product_depth bedroom_count  \
0    08 8941 8289      NaN       NaN      premiere           2.0   
1      0411724000      NaN       NaN      premiere           3.0   
2    08 8981 0080      NaN       NaN      premiere           2.0   
3    08 8982 2403      NaN       NaN      premiere           1.0   
4      0418885966      NaN       NaN      premiere           3.0   
..            ...      ...       ...           ...           ...   
995  08 8981 2666      NaN       NaN       feature           4.0   
996   08 89834326      NaN       NaN      standard           4.0   
997   08 89834326      NaN       NaN      standard           4.0   
998   08 89834326      NaN       NaN      standard           4.0   
999  0418 895 345      NaN       NaN       feature           2.0   

    bathroom_count  parking_count              RunDate  
0              1.0            1.0  2022-05-27 15:54:05  
1              2.0            2.0  2022-05-27 15:54:05  
2              1.0            1.0  2022-05-27 15:54:05  
3              1.0            0.0  2022-05-27 15:54:05  
4              2.0            2.0  2022-05-27 15:54:05  
..             ...            ...                  ...  
995            3.0            6.0  2022-05-27 15:54:05  
996            2.0            2.0  2022-05-27 15:54:05  
997            2.0            2.0  2022-05-27 15:54:05  
998            2.0            3.0  2022-05-27 15:54:05  
999            2.0            2.0  2022-05-27 15:54:05  

[1000 rows x 27 columns]>
In [230]:
print(df.columns)
Index(['index', 'TID', 'breadcrumb', 'category_name', 'property_type',
       'building_size', 'land_size', 'preferred_size', 'open_date',
       'listing_agency', 'price', 'location_number', 'location_type',
       'location_name', 'address', 'address_1', 'city', 'state', 'zip_code',
       'phone', 'latitude', 'longitude', 'product_depth', 'bedroom_count',
       'bathroom_count', 'parking_count', 'RunDate'],
      dtype='object')
In [231]:
n_df = df[['building_size','bedroom_count','bathroom_count', 'parking_count','price']]
In [232]:
df_n = n_df.fillna(0)
In [233]:
df_n
Out[233]:
building_size bedroom_count bathroom_count parking_count price
0 0 2.0 1.0 1.0 $435,000
1 171m² 3.0 2.0 2.0 Offers Over $320,000
2 0 2.0 1.0 1.0 $310,000
3 0 1.0 1.0 0.0 $259,000
4 201m² 3.0 2.0 2.0 $439,000
... ... ... ... ... ...
995 0 4.0 3.0 6.0 2 Residence
996 203m² 4.0 2.0 2.0 $601,000
997 209.6m² 4.0 2.0 2.0 $655,000
998 180m² 4.0 2.0 3.0 $675,000
999 120m² 2.0 2.0 2.0 $399,000

1000 rows × 5 columns

In [234]:
df_n['bedroom_count'].value_counts().plot(kind='bar')
plt.title('number of Bedroom')
plt.xlabel('Bedrooms')
plt.ylabel('Count')
sns.despine
Out[234]:
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
In [235]:
df_n['parking_count'].value_counts().plot(kind='bar')
plt.title('parking_count')
plt.xlabel('Parking')
plt.ylabel('Count')
sns.despine
Out[235]:
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
In [236]:
plt.scatter(df.bedroom_count,df_n.price)
plt.title("Bedroom and Price ")
plt.xlabel("Bedrooms")
plt.ylabel("Price")
plt.show()
sns.despine
Out[236]:
<function seaborn.utils.despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False)>
In [237]:
df_p = df_n.price
df_p
Out[237]:
0                  $435,000
1      Offers Over $320,000
2                  $310,000
3                  $259,000
4                  $439,000
               ...         
995             2 Residence
996                $601,000
997                $655,000
998                $675,000
999                $399,000
Name: price, Length: 1000, dtype: object
In [238]:
df_p.map(lambda x: x.lstrip('$'))
Out[238]:
0                   435,000
1      Offers Over $320,000
2                   310,000
3                   259,000
4                   439,000
               ...         
995             2 Residence
996                 601,000
997                 655,000
998                 675,000
999                 399,000
Name: price, Length: 1000, dtype: object
In [239]:
import re
df_p = df_p.apply(lambda row: re.sub("[^0-9]"," ", row))
In [240]:
df_p
Out[240]:
0                   435 000
1                   320 000
2                   310 000
3                   259 000
4                   439 000
               ...         
995             2          
996                 601 000
997                 655 000
998                 675 000
999                 399 000
Name: price, Length: 1000, dtype: object
In [241]:
from sklearn.linear_model import LinearRegression
In [242]:
X=df_n[['bedroom_count','bathroom_count', 'parking_count']]
y=df_p
In [243]:
X
Out[243]:
bedroom_count bathroom_count parking_count
0 2.0 1.0 1.0
1 3.0 2.0 2.0
2 2.0 1.0 1.0
3 1.0 1.0 0.0
4 3.0 2.0 2.0
... ... ... ...
995 4.0 3.0 6.0
996 4.0 2.0 2.0
997 4.0 2.0 2.0
998 4.0 2.0 3.0
999 2.0 2.0 2.0

1000 rows × 3 columns

In [244]:
y
Out[244]:
0                   435 000
1                   320 000
2                   310 000
3                   259 000
4                   439 000
               ...         
995             2          
996                 601 000
997                 655 000
998                 675 000
999                 399 000
Name: price, Length: 1000, dtype: object
In [245]:
from sklearn.model_selection import train_test_split
In [246]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)
In [247]:
X_train
Out[247]:
bedroom_count bathroom_count parking_count
175 1.0 1.0 2.0
818 3.0 2.0 2.0
677 4.0 2.0 2.0
952 1.0 1.0 1.0
200 3.0 1.0 3.0
... ... ... ...
534 6.0 2.0 6.0
584 3.0 1.0 2.0
493 3.0 1.0 1.0
527 4.0 2.0 2.0
168 3.0 2.0 2.0

800 rows × 3 columns

In [248]:
len(X_train)
Out[248]:
800
In [249]:
X_test
Out[249]:
bedroom_count bathroom_count parking_count
37 1.0 1.0 1.0
726 4.0 2.0 2.0
846 4.0 2.0 4.0
295 3.0 1.0 4.0
924 2.0 2.0 2.0
... ... ... ...
839 1.0 1.0 0.0
810 3.0 1.0 2.0
930 3.0 2.0 2.0
616 4.0 2.0 2.0
809 3.0 1.0 2.0

200 rows × 3 columns

In [250]:
len(X_test)
Out[250]:
200
In [251]:
y_test
Out[251]:
37                  465 000
726                 750 000
846                        
295                 629 000
924                        
               ...         
839                        
810                 462 500
930                 515 000
616                 559 000
809                 375 000
Name: price, Length: 200, dtype: object
In [252]:
y_train
Out[252]:
175                 399 000
818                 865 000
677                 639 000
952      235 000           
200                 419 000
               ...         
534                        
584                 375 000
493                 395 000
527                        
168                 480 000
Name: price, Length: 800, dtype: object
In [253]:
clf = LinearRegression()
In [ ]:
clf.fit(X_train,y_train)
In [ ]:
clf.predict(X_test)
In [ ]:
clf.score(X_test,y_test)