momoci99/main.py

## main.py
import requests
import re
from bs4 import BeautifulSoup
from pymongo import MongoClient


def getNumbers(value):
    result = re.sub('\s+', '', value)
    result = result.replace('kg', "").replace(',', "").replace('원', "")
    return result


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print('main')

my_client = MongoClient("mongodb://localhost:27017/")


songi_db = my_client["songi_db"]
songi_col = songi_db["songi_collection"]


url = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=20201019&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'
request_url_format = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=%s&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'


req = requests.get(url)

# HTML 소스 가져오기
html = req.text

# print(html)

soup = BeautifulSoup(html, 'html.parser')

selects = soup.select(
    'select > option'
)

days = []
# days.append()
datas = []
for select in selects:

    new_url = request_url_format % select['value']
    req = requests.get(new_url)
    pag_html = req.text
    soup = BeautifulSoup(pag_html, 'html.parser')

    trs = soup.select(
        'table > tbody tr'
    )

    print(select['value'])
    for tr in trs:
        # print(tr)
        # row = []
        row = {}
        row['date'] = select['value']
        index = 0
        for td in tr.find_all('td'):
            value = td.get_text().strip()
            # print(value, index)
            if index == 0:
                row['Region'] = value
            if index == 1:
                row['Union'] = value
            if index == 2:
                spliteds = value.split('\n')
                row['Yesterday_Count_Sum'] = getNumbers(spliteds[0])
                row['Yesterday_Price_Sum'] = getNumbers(spliteds[1])
            if index == 3:
                spliteds = value.split('\n')
                row['Today_Count_Sum'] = getNumbers(spliteds[0])
                row['Today_Price_Sum'] = getNumbers(spliteds[1])
            if index == 4:
                spliteds = value.split('\n')
                row['Total_Count_Sum'] = getNumbers(spliteds[0])
                row['Total_Price_Sum'] = getNumbers(spliteds[1])
            if index == 5:
                spliteds = value.split('\n')
                row['Fist_Class_Count'] = getNumbers(spliteds[0])
                row['Fist_Class_Price'] = getNumbers(spliteds[1])
            if index == 6:
                spliteds = value.split('\n')
                row['Second_Class_Count'] = getNumbers(spliteds[0])
                row['Second_Class_Price'] = getNumbers(spliteds[1])
            if index == 7:
                spliteds = value.split('\n')
                row['Third_Main_Class_Count'] = getNumbers(spliteds[0])
                row['Third_Main_Class_Price'] = getNumbers(spliteds[1])
            if index == 8:
                spliteds = value.split('\n')
                row['Third_Sub_Class_Count'] = getNumbers(spliteds[0])
                row['Third_Sub_Class_Price'] = getNumbers(spliteds[1])
            if index == 9:
                spliteds = value.split('\n')
                row['Etc_Class_Count'] = getNumbers(spliteds[0])
                row['Ect_Class_Price'] = getNumbers(spliteds[1])
            if index == 10:
                spliteds = value.split('\n')
                row['Mixed_Class_Count'] = getNumbers(spliteds[0])
                row['Mixed_Class_Price'] = getNumbers(spliteds[1])
            if index == 11:
                row['Last_Updated'] = value
            index = index+1
        songi_col.insert_one(row)
        datas.append(row)
	import requests
	import re
	from bs4 import BeautifulSoup
	from pymongo import MongoClient


	def getNumbers(value):
	result = re.sub('\s+', '', value)
	result = result.replace('kg', "").replace(',', "").replace('원', "")
	return result


	# Press the green button in the gutter to run the script.
	if __name__ == '__main__':
	print('main')

	my_client = MongoClient("mongodb://localhost:27017/")


	songi_db = my_client["songi_db"]
	songi_col = songi_db["songi_collection"]


	url = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=20201019&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'
	request_url_format = 'http://iforest.nfcf.or.kr/forest/user.tdf?a=user.songi.SongiApp&c=1002&sply_date=%s&pmsh_item_c=01&mc=CYB_FIF_DGS_SNI_02'


	req = requests.get(url)

	# HTML 소스 가져오기
	html = req.text

	# print(html)

	soup = BeautifulSoup(html, 'html.parser')

	selects = soup.select(
	'select > option'
	)

	days = []
	# days.append()
	datas = []
	for select in selects:

	new_url = request_url_format % select['value']
	req = requests.get(new_url)
	pag_html = req.text
	soup = BeautifulSoup(pag_html, 'html.parser')

	trs = soup.select(
	'table > tbody tr'
	)

	print(select['value'])
	for tr in trs:
	# print(tr)
	# row = []
	row = {}
	row['date'] = select['value']
	index = 0
	for td in tr.find_all('td'):
	value = td.get_text().strip()
	# print(value, index)
	if index == 0:
	row['Region'] = value
	if index == 1:
	row['Union'] = value
	if index == 2:
	spliteds = value.split('\n')
	row['Yesterday_Count_Sum'] = getNumbers(spliteds[0])
	row['Yesterday_Price_Sum'] = getNumbers(spliteds[1])
	if index == 3:
	spliteds = value.split('\n')
	row['Today_Count_Sum'] = getNumbers(spliteds[0])
	row['Today_Price_Sum'] = getNumbers(spliteds[1])
	if index == 4:
	spliteds = value.split('\n')
	row['Total_Count_Sum'] = getNumbers(spliteds[0])
	row['Total_Price_Sum'] = getNumbers(spliteds[1])
	if index == 5:
	spliteds = value.split('\n')
	row['Fist_Class_Count'] = getNumbers(spliteds[0])
	row['Fist_Class_Price'] = getNumbers(spliteds[1])
	if index == 6:
	spliteds = value.split('\n')
	row['Second_Class_Count'] = getNumbers(spliteds[0])
	row['Second_Class_Price'] = getNumbers(spliteds[1])
	if index == 7:
	spliteds = value.split('\n')
	row['Third_Main_Class_Count'] = getNumbers(spliteds[0])
	row['Third_Main_Class_Price'] = getNumbers(spliteds[1])
	if index == 8:
	spliteds = value.split('\n')
	row['Third_Sub_Class_Count'] = getNumbers(spliteds[0])
	row['Third_Sub_Class_Price'] = getNumbers(spliteds[1])
	if index == 9:
	spliteds = value.split('\n')
	row['Etc_Class_Count'] = getNumbers(spliteds[0])
	row['Ect_Class_Price'] = getNumbers(spliteds[1])
	if index == 10:
	spliteds = value.split('\n')
	row['Mixed_Class_Count'] = getNumbers(spliteds[0])
	row['Mixed_Class_Price'] = getNumbers(spliteds[1])
	if index == 11:
	row['Last_Updated'] = value
	index = index+1
	songi_col.insert_one(row)
	datas.append(row)