ebayの検索結果から、商品情報を抽出できるコードを作った話。
ひっさびさの投稿。2日、実質6時間くらいで作りました。と言っても内、3時間はpandasと格闘。
ほぼ、3時間かね。
内容は至って簡単。下記のCSVが好きなだけ作れます。
細かいところは教えてほしければ、個別で連絡ください。
基本的に、どんな商品でも対応できます。
今度は、ここから、amazon情報と連携させる予定。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome import service as fs
from selenium.webdriver.chrome.options import Options
from urllib.parse import urljoin
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import shutil
import csv
import os
import glob
from PIL import Image, ImageFilter
from natsort import natsorted
import re
url = 'https://www.ebay.com/'
#gazo = [["title","cost",'sold','UPC']]
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.maximize_window()
driver.get(url)
driver.execute_script("""delete Object.getPrototypeOf(navigator).webdriver;""")
time.sleep(2)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-default"]/span[2]')
element.click()
time.sleep(2)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-en"]')
element.click()
element = driver.find_element(By.XPATH, '//*[@id="gh-ac"]')
element.click()
element.clear()
element.send_keys(input('検索したい製品を記載してください'))
element = driver.find_element(By.XPATH, '//*[@id="gh-btn"]')
element.click()
soupsrc = BeautifulSoup(driver.page_source, 'html.parser')
driver.close()
link = [] #各pageのsrc保管用
detail = soupsrc.find_all(class_ = 's-item__info clearfix')
for i in detail:
i = i.find('a').get('href')
link.append(i)
itemsrc = []
for url in link:
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.maximize_window()
#print(url)
driver.get(url)
driver.execute_script("""delete Object.getPrototypeOf(navigator).webdriver;""")
time.sleep(1)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-default"]/span[2]')
element.click()
time.sleep(1)
element = driver.find_element(By.XPATH, '//*[@id="gh-eb-Geo-a-en"]')
element.click()
soupsrc = BeautifulSoup(driver.page_source, 'html.parser')
driver.close()
itemsrc.append(soupsrc)
#driver.close()
list_detail = []
df03 = pd.DataFrame(list_detail)
num = 1
for l in itemsrc[1:]:
try:
print('第一段階')
listprofile = []
listspecifics = []
listspecifics_detail = []
for i in l:
Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
#print(Title)
try:
soldnumber = i.find(class_= 'd-quantity__availability' ).getText()
#print(soldnumber)
except AttributeError:
soldnumber = 0
#print(soldnumber)
prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' )
for j in prifile:
j = j.getText()
listprofile.append(j)
#print(listprofile)
listprofile.pop(1)
listprofile.pop(1)
#listprofile.pop(2)
for k in listprofile[0::2]:
#print(k)
listspecifics.append(k)
for k in listprofile[1::2]:
#print(k)
listspecifics_detail.append(k)
listspecifics.insert(0,'title')
#listspecifics.insert(0, num )
listspecifics.insert(2,'soldnumber')
num += 1
listspecifics_detail.insert(0,str(Title))
#listspecifics_detail.insert(0,num)
listspecifics_detail.insert(2,soldnumber)
num += 1
df00 = pd.DataFrame(listspecifics)
df01 = pd.DataFrame(listspecifics_detail)
df02 = pd.concat([df00,df01], axis=1)
df02 = df02.T
df02.columns = df02.iloc[0]
df02.reset_index(drop=True, inplace=True)
df02 = df02.drop(df02.index[0])
df03 = pd.concat([df03,df02], axis=0)
df03.reset_index(drop=True, inplace=True)
except:
try:
print('第二段階')
listprofile = []
listspecifics = []
listspecifics_detail = []
for i in l:
Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
#print(Title)
try:
soldnumber = i.find(class_= 'd-quantity__availability' ).getText()
#print(soldnumber)
except AttributeError:
soldnumber = 0
#print(soldnumber)
prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' )
for j in prifile:
j = j.getText()
listprofile.append(j)
#print(listprofile)
listprofile.pop(1)
listprofile.pop(1)
listprofile.pop(2)
#print(listprofile)
for k in listprofile[0::2]:
#print(k)
listspecifics.append(k)
for k in listprofile[1::2]:
#print(k)
listspecifics_detail.append(k)
listspecifics.insert(0,'title')
#listspecifics.insert(0, num )
listspecifics.insert(2,'soldnumber')
num += 1
listspecifics_detail.insert(0,str(Title))
#listspecifics_detail.insert(0,num)
listspecifics_detail.insert(2,soldnumber)
num += 1
df00 = pd.DataFrame(listspecifics)
df01 = pd.DataFrame(listspecifics_detail)
df02 = pd.concat([df00,df01], axis=1)
df02 = df02.T
df02.columns = df02.iloc[0]
df02.reset_index(drop=True, inplace=True)
df02 = df02.drop(df02.index[0])
df03 = pd.concat([df03,df02], axis=0)
df03.reset_index(drop=True, inplace=True)
except:
print('例外キャッチ')
listprofile = []
listspecifics = []
listspecifics_detail = []
for i in l:
Title = i.find(class_= 'x-item-title__mainTitle' ).getText()
#print(Title)
try:
soldnumber = i.find(class_= 'd-quantity__availability' ).getText()
#print(soldnumber)
except AttributeError:
soldnumber = 0
#print(soldnumber)
prifile = i.find(class_= 'ux-layout-section-evo__item' ).find_all(class_= 'ux-textspans' )
for j in prifile:
j = j.getText()
listprofile.append(j)
listprofile.pop(2)
listprofile.pop(2)
#print(listprofile)
for k in listprofile[0::2]:
#print(k)
listspecifics.append(k)
for k in listprofile[1::2]:
#print(k)
listspecifics_detail.append(k)
listspecifics.insert(0,'title')
#listspecifics.insert(0, num )
listspecifics.insert(2,'soldnumber')
num += 1
listspecifics_detail.insert(0,str(Title))
#listspecifics_detail.insert(0,num)
listspecifics_detail.insert(2,soldnumber)
num += 1
df00 = pd.DataFrame(listspecifics)
df01 = pd.DataFrame(listspecifics_detail)
df02 = pd.concat([df00,df01], axis=1)
df02 = df02.T
df02.columns = df02.iloc[0]
df02.reset_index(drop=True, inplace=True)
df02 = df02.drop(df02.index[0])
df03 = pd.concat([df03,df02], axis=0)
df03.reset_index(drop=True, inplace=True)
path = r"C:\Users\Owner\Desktop\work_current\sorce\ebay.csv"
df03.to_csv(path, encoding='utf-8-sig',index=False)
print('end')
こんなこともやってます。
この記事が気に入ったらサポートをしてみませんか?