Skip to content

Instantly share code, notes, and snippets.

@momoci99
Last active October 26, 2020 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save momoci99/8ea65846297ea020d818a8ff46aedd0e to your computer and use it in GitHub Desktop.
Save momoci99/8ea65846297ea020d818a8ff46aedd0e to your computer and use it in GitHub Desktop.
송이 공판현황 데이터 크롤링 + mongodb insert
import requests
import re
from bs4 import BeautifulSoup
from pymongo import MongoClient
def getNumbers(value):
result = re.sub('\s+', '', value)
result = result.replace('kg', "").replace(',', "").replace('원', "")
return result
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print('main')
my_client = MongoClient("mongodb://localhost:27017/")
songi_db = my_client["songi_db"]
songi_col = songi_db["songi_collection"]
url = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=20201019&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'
request_url_format = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=%s&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'
req = requests.get(url)
# HTML 소스 가져오기
html = req.text
# print(html)
soup = BeautifulSoup(html, 'html.parser')
selects = soup.select(
'select > option'
)
days = []
# days.append()
datas = []
for select in selects:
new_url = request_url_format % select['value']
req = requests.get(new_url)
pag_html = req.text
soup = BeautifulSoup(pag_html, 'html.parser')
trs = soup.select(
'table > tbody tr'
)
print(select['value'])
for tr in trs:
# print(tr)
# row = []
row = {}
row['date'] = select['value']
index = 0
for td in tr.find_all('td'):
value = td.get_text().strip()
# print(value, index)
if index == 0:
row['Region'] = value
if index == 1:
row['Union'] = value
if index == 2:
spliteds = value.split('\n')
row['Yesterday_Count_Sum'] = getNumbers(spliteds[0])
row['Yesterday_Price_Sum'] = getNumbers(spliteds[1])
if index == 3:
spliteds = value.split('\n')
row['Today_Count_Sum'] = getNumbers(spliteds[0])
row['Today_Price_Sum'] = getNumbers(spliteds[1])
if index == 4:
spliteds = value.split('\n')
row['Total_Count_Sum'] = getNumbers(spliteds[0])
row['Total_Price_Sum'] = getNumbers(spliteds[1])
if index == 5:
spliteds = value.split('\n')
row['Fist_Class_Count'] = getNumbers(spliteds[0])
row['Fist_Class_Price'] = getNumbers(spliteds[1])
if index == 6:
spliteds = value.split('\n')
row['Second_Class_Count'] = getNumbers(spliteds[0])
row['Second_Class_Price'] = getNumbers(spliteds[1])
if index == 7:
spliteds = value.split('\n')
row['Third_Main_Class_Count'] = getNumbers(spliteds[0])
row['Third_Main_Class_Price'] = getNumbers(spliteds[1])
if index == 8:
spliteds = value.split('\n')
row['Third_Sub_Class_Count'] = getNumbers(spliteds[0])
row['Third_Sub_Class_Price'] = getNumbers(spliteds[1])
if index == 9:
spliteds = value.split('\n')
row['Etc_Class_Count'] = getNumbers(spliteds[0])
row['Ect_Class_Price'] = getNumbers(spliteds[1])
if index == 10:
spliteds = value.split('\n')
row['Mixed_Class_Count'] = getNumbers(spliteds[0])
row['Mixed_Class_Price'] = getNumbers(spliteds[1])
if index == 11:
row['Last_Updated'] = value
index = index+1
songi_col.insert_one(row)
datas.append(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment