Skip to content

Instantly share code, notes, and snippets.

@y-ookuma
Last active September 14, 2022 10:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save y-ookuma/47d1264be232043a8e17b59fd011ae56 to your computer and use it in GitHub Desktop.
Save y-ookuma/47d1264be232043a8e17b59fd011ae56 to your computer and use it in GitHub Desktop.
jancodeを使ってyahoo APIを商品名と荷姿を取得する。入力ファイルは、jan.txt。出力ファイルは、output.csv。
import requests,csv,json,time,os,collections
from bs4 import BeautifulSoup
import mojimoji as mo
import pandas as pd
client_id = "Your Yahoo client id"
inputfile="jan.txt"
outputfile="output.csv"
def code_to_product_info(code):
TARGET_URL = "https://shopping.yahooapis.jp/ShoppingWebService/V3/itemSearch?hits=2&appid={0}&query={1}".format(client_id, code)
print(TARGET_URL)
# スクレイピング
html = requests.get(TARGET_URL) # HTMLを取ってくる
htmljson = json.loads(html.text)
product_name="" #商品名
product_volume="" #荷姿等
product_price="" #価格
if htmljson["totalResultsAvailable"] > 0 :
# 文字の整形
price=htmljson["hits"][0]["priceLabel"]["defaultPrice"]
if price is not None:
product_price=price
name=mo.zen_to_han(htmljson["hits"][0]["name"]) #全角to半角
name=name.replace('殺菌剤', '')
name=name.replace('農薬', '')
name=name.replace('除草剤', '')
name=name.replace('水稲用除草剤', '')
name=name.replace('(送料無料)', '')
name=name.replace('アグロカネショウ', '')
name=name.replace('【メール便可】', '')
name=name.replace('(ウララドライフロアブル)', '')
name=name.replace('バイエル', '')
name=name.replace('/', ' ')
name=name.replace(' ', ' ')
name=name.strip() #先頭および末尾から「空白」と「改行」を削除
if ' ' in name:
product_name=name.split(" ")[0]
product_volume=name.split(" ")[1]
else:
product_name=name
product_volume=""
return [code,product_name,product_volume,product_price]
#出力ファイルが存在しているなら削除
if os.path.isfile(outputfile):
os.remove(outputfile)
cols =["No","JANコード","商品名","荷姿","価格","在庫数"]
df = pd.DataFrame(index=[], columns=cols)
#jancode 読み込み
f = open(inputfile, 'r', encoding='UTF-8')
janlist = f.readlines()
jandict = collections.Counter(janlist) #辞書型 key:JANコード、value:重複数
jancnt=len(jandict)
i=0
for jancode, count in jandict.items():
i+=1
jancode=jancode.strip() #先頭および末尾から「空白」と「改行」を削除
if len(jancode) >= 1: #1文字以上の場合処理
print('*********** start product_info ************')
print(" 処理件数: ",i,"/",jancnt, " JANコード:",jancode)
output1= code_to_product_info(jancode)
output= [i] + output1 + [count]
print(output)
#dfに追加
record = pd.Series(output, index=df.columns)
df = df.append(record, ignore_index=True)
print('*********** end product_info ************')
print("")
#時間制限対応
time.sleep(5)
# CSVファイルを出力
df.to_csv(outputfile, index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment