見出し画像

ebayの検索結果から、商品情報を抽出できるコードを作った話。

ひっさびさの投稿。2日、実質6時間くらいで作りました。と言っても内、3時間はpandasと格闘。
ほぼ、3時間かね。

内容は至って簡単。下記のCSVが好きなだけ作れます。
細かいところは教えてほしければ、個別で連絡ください。
基本的に、どんな商品でも対応できます。
今度は、ここから、amazon情報と連携させる予定。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome import service as fs
from selenium.webdriver.chrome.options import Options
from urllib.parse import urljoin
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import shutil
import csv
import os
import glob
from PIL import Image, ImageFilter
from natsort import natsorted
import re

url = 'https://www.ebay.com/'
#gazo = [["title","cost",'sold','UPC']]
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get(url)
driver.execute_script("""delete Object.getPrototypeOf(navigator).webdriver;""")
time.sleep(2)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-default"]/span[2]')
element.click()
time.sleep(2)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-en"]')
element.click()

element = driver.find_element(By.XPATH, '//*[@id="gh-ac"]')
element.click()
element.clear()
element.send_keys(input('検索したい製品を記載してください'))
element = driver.find_element(By.XPATH, '//*[@id="gh-btn"]')
element.click()
soupsrc = BeautifulSoup(driver.page_source, 'html.parser')
driver.close()
link = [] #各pageのsrc保管用
detail = soupsrc.find_all(class_ = 's-item__info clearfix')
for i in detail:
    i = i.find('a').get('href')
    link.append(i)
itemsrc = [] 
for url in link:
    options = webdriver.ChromeOptions()
    #options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    #print(url)
    driver.get(url)
    driver.execute_script("""delete Object.getPrototypeOf(navigator).webdriver;""")
    time.sleep(1)
    element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-default"]/span[2]')
    element.click()
    time.sleep(1)
    element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-en"]')
    element.click()
    soupsrc = BeautifulSoup(driver.page_source, 'html.parser')
    driver.close()
    itemsrc.append(soupsrc)
    #driver.close()
list_detail = []
df03 = pd.DataFrame(list_detail)
num = 1
for l in itemsrc[1:]:
    try:
        print('第一段階')
        listprofile = [] 
        listspecifics = []
        listspecifics_detail = [] 
    
    
        for i in l:
            Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
            #print(Title)
            try:
                soldnumber =  i.find(class_= 'd-quantity__availability' ).getText()
                #print(soldnumber)
        
            except AttributeError:
                soldnumber =  0
                #print(soldnumber)
        
            prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' ) 
            
            for j in prifile:
                j = j.getText()
                listprofile.append(j)
        
            #print(listprofile)
            listprofile.pop(1)
            listprofile.pop(1)
            #listprofile.pop(2)
            
            for k in listprofile[0::2]:
                #print(k)
                listspecifics.append(k)
        
                
            for k in listprofile[1::2]:
                #print(k)
                listspecifics_detail.append(k)
        
    
                   
            listspecifics.insert(0,'title')
            #listspecifics.insert(0, num )
            listspecifics.insert(2,'soldnumber')
            num += 1
            listspecifics_detail.insert(0,str(Title))
            #listspecifics_detail.insert(0,num)        
            listspecifics_detail.insert(2,soldnumber)
            num += 1
            df00 = pd.DataFrame(listspecifics)
            df01 = pd.DataFrame(listspecifics_detail)
            df02 = pd.concat([df00,df01], axis=1)
            df02 = df02.T
            df02.columns = df02.iloc[0]
            df02.reset_index(drop=True, inplace=True)
            df02 = df02.drop(df02.index[0])
            df03 = pd.concat([df03,df02], axis=0)
            df03.reset_index(drop=True, inplace=True)
    except:
        try:
            print('第二段階')
            listprofile = [] 
            listspecifics = []
            listspecifics_detail = [] 
        
        
            for i in l:
                Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
                #print(Title)
                try:
                    soldnumber =  i.find(class_= 'd-quantity__availability' ).getText()
                    #print(soldnumber)
            
                except AttributeError:
                    soldnumber =  0
                    #print(soldnumber)
            
                prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' ) 
                
                for j in prifile:
                    j = j.getText()
                    listprofile.append(j)
            
                #print(listprofile)
                listprofile.pop(1)
                listprofile.pop(1)
                listprofile.pop(2)
                #print(listprofile)
                
                for k in listprofile[0::2]:
                    #print(k)
                    listspecifics.append(k)
            
                    
                for k in listprofile[1::2]:
                    #print(k)
                    listspecifics_detail.append(k)
            
        
                       
                listspecifics.insert(0,'title')
                #listspecifics.insert(0, num )
                listspecifics.insert(2,'soldnumber')
                num += 1
                listspecifics_detail.insert(0,str(Title))
                #listspecifics_detail.insert(0,num)        
                listspecifics_detail.insert(2,soldnumber)
                num += 1
                df00 = pd.DataFrame(listspecifics)
                df01 = pd.DataFrame(listspecifics_detail)
                df02 = pd.concat([df00,df01], axis=1)
                df02 = df02.T
                df02.columns = df02.iloc[0]
                df02.reset_index(drop=True, inplace=True)
                df02 = df02.drop(df02.index[0])
                df03 = pd.concat([df03,df02], axis=0)
                df03.reset_index(drop=True, inplace=True)
        except:
            print('例外キャッチ')
            listprofile = [] 
            listspecifics = []
            listspecifics_detail = [] 
        
        
            for i in l:
                Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
                #print(Title)
                try:
                    soldnumber =  i.find(class_= 'd-quantity__availability' ).getText()
                    #print(soldnumber)
            
                except AttributeError:
                    soldnumber =  0
                    #print(soldnumber)
            
                prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' ) 
                
                for j in prifile:
                    j = j.getText()
                    listprofile.append(j)
                listprofile.pop(2)     
                listprofile.pop(2)  
                #print(listprofile)
    
    
                
                for k in listprofile[0::2]:
                    #print(k)
                    listspecifics.append(k)
            
                    
                for k in listprofile[1::2]:
                    #print(k)
                    listspecifics_detail.append(k)
            
        
                       
                listspecifics.insert(0,'title')
                #listspecifics.insert(0, num )
                listspecifics.insert(2,'soldnumber')
                num += 1
                listspecifics_detail.insert(0,str(Title))
                #listspecifics_detail.insert(0,num)        
                listspecifics_detail.insert(2,soldnumber)
                num += 1
                df00 = pd.DataFrame(listspecifics)
                df01 = pd.DataFrame(listspecifics_detail)
                df02 = pd.concat([df00,df01], axis=1)
                df02 = df02.T
                df02.columns = df02.iloc[0]
                df02.reset_index(drop=True, inplace=True)
                df02 = df02.drop(df02.index[0])
                df03 = pd.concat([df03,df02], axis=0)
                df03.reset_index(drop=True, inplace=True)

path = r"C:\Users\Owner\Desktop\work_current\sorce\ebay.csv"
df03.to_csv(path, encoding='utf-8-sig',index=False)
print('end')         

こんなこともやってます。


この記事が気に入ったらサポートをしてみませんか?