In [ ]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
In [ ]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):

    try:
        price = soup.find("span", attrs={'id':'priceblock_ourprice'}).string.strip()

    except AttributeError:

        try:
            # If there is some deal price
            price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()

        except:
            price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""	

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""	

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = "Not Available"	

    return available
In [ ]:
if __name__ == '__main__':

    # add your user agent 
    HEADERS = ({'User-Agent':'', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    URL = "https://www.amazon.com/s?k=playstation+4&ref=nb_sb_noss_2"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))

    d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)
C:\Users\DELL\AppData\Local\Temp\ipykernel_11088\4254666078.py:42: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  amazon_df['title'].replace('', np.nan, inplace=True)
In [ ]:
amazon_df
Out[ ]:
title price rating reviews availability
0 Sony Playstation PS4 1TB Black Console 4.6 out of 5 stars 1,399 ratings In Stock
1 Flagship Newest Play Station 4 1TB HDD Only on... 4.5 out of 5 stars 199 ratings Not Available
2 Newest Sony Playstation 4 Slim 1TB SSD Console... 4.3 out of 5 stars 312 ratings Not Available
3 PlayStation®5 Digital Edition (slim) 4.7 out of 5 stars 3,478 ratings Only 1 left in stock - order soon.
4 Charger Dock Station for PS4, 1.8 Hrs Fast Cha... 4.6 out of 5 stars 14,246 ratings In Stock
5 Wireless Controller Dual Vibration Game Joysti... 4.1 out of 5 stars 1,258 ratings In Stock
6 Wireless Controller for PS4 with 2 Thumb Grips... 4.1 out of 5 stars 1,602 ratings In Stock
7 VidPPluing Wireless Controller for PS4/Pro/Sli... 4.3 out of 5 stars 400 ratings In Stock
8 PlayStation 4 Slim 1TB Console - Marvel's Spid... 4.7 out of 5 stars 2,814 ratings Not Available
9 OIVO PS4 Stand Cooling Fan Station for Playsta... 4.5 out of 5 stars 45,109 ratings In Stock
10 PlayStation 4 Slim 500GB Console - Uncharted 4... 4.8 out of 5 stars 6,453 ratings Only 1 left in stock - order soon.
11 Turtle Beach Stealth 700 Gen 2 MAX Wireless Am... 4.3 out of 5 stars 1,621 ratings In Stock
12 OWC 2.0 TB External Hard Drive Upgrade for Son... 4.8 out of 5 stars 60 ratings Only 19 left in stock - order soon.
13 Minecraft Starter Collection (PS4) 4.6 out of 5 stars 660 ratings Only 1 left in stock - order soon.
14 Light-up Wireless Controller for PS4,Black Cra... 4.3 out of 5 stars 269 ratings In Stock
15 PlayStation 4 Slim 1TB Console - Black (Renewed) 4.1 out of 5 stars 1,513 ratings Only 2 left in stock - order soon.
16 PlayStation 4 Slim 1TB Console 4.7 out of 5 stars 15,672 ratings Not Available
17 The Crew Motorfest - Standard Edition, PlaySta... 4.7 out of 5 stars 126 ratings In Stock
18 Replacement Astro A40 A10 A30 A50 Gaming Heads... 3.9 out of 5 stars 12 ratings
19 Rolling Universal Gaming Backpack for Xbox One... 3.8 out of 5 stars 51 ratings In Stock
In [ ]: