Skip to content

Instantly share code, notes, and snippets.

@ku-kim
Created June 6, 2022 12:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ku-kim/12651a8748906dff8149bb52fb3ab88b to your computer and use it in GitHub Desktop.
Save ku-kim/12651a8748906dff8149bb52fb3ab88b to your computer and use it in GitHub Desktop.
TourAPI 3.0 숙박 데이터
# 공공데이터 포털 한국관광공사 국문 관광정보 서비스 API 신청
# TourAPI 3.0 https://api.visitkorea.or.kr/guide/inforArea.do
# GET http://api.visitkorea.or.kr/openapi/service/rest/KorService/areaCode?ServiceKey=ServiceKey&numOfRows=10&pageNo=1&MobileOS=ETC&MobileApp=TestApp&_type=json
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import random
from faker import Faker
import numpy as np
import pandas as pd
url = 'http://api.visitkorea.or.kr/openapi/service/rest/KorService/searchStay?ServiceKey='
param = '&areaCode=&sigunguCode=&listYN=Y&MobileOS=ETC&MobileApp=TourAPI3.0_Guide&arrange=A&numOfRows=12&pageNo='
key = '인증키' # 한국관광공사 국문 관광정보 서비스 API 신청하여 나온 인증키를 넣자(일반 인증키(Encoding)
df_room = pd.DataFrame(columns=['host_id', 'title', 'description', 'address' , 'lat', 'lng' ,
'bathroom_count', 'bed_count', 'bedroom_count', 'header_count_capacity',
'cleaning_fee', 'daily_price', 'lodging_tax_ratio', 'sale_ratio', 'service_fee',
'rating_star_score', 'review_count'])
df_room_image = pd.DataFrame(columns=['room_id', 'image_url'])
global global_row_i
global_row_i = 0
for page_number in range(1, 274):
request_url = url + key + param + str(page_number)
data = urlopen(request_url).read()
soup = BeautifulSoup(data, "html.parser")
items = soup.find("items")
normal_distribution_3500 = np.random.uniform(0,1,3500)
data_price_raws = normal_distribution_3500 * 10000
for item in items.findAll("item"):
if item.title == None:
item_title = None
else:
item_title = item.title.text
if item.addr1 == None:
item_addr = None
else:
item_addr = item.addr1.text
if item.mapx == None:
item_mapx = None
else:
item_mapx = item.mapx.text
if item.mapy == None:
item_mapy = None
else:
item_mapy = item.mapy.text
df_room.loc[global_row_i] = [random.randint(1, 500) ,item_title, fake.color_name() + fake.name(), item_addr ,item_mapx, item_mapx,
random.randint(1, 3), random.randint(1, 3), random.randint(1, 5), random.randint(1, 10),
random.randint(10, 100) * 100, int(data_price_raws[global_row_i]) * 100, 10, random.randint(1, 10), random.randint(5, 50) * 100,
round(random.uniform(1, 5), 2), random.randint(10, 500)]
if item.firstimage == None:
image_url = None
else:
image_url = item.firstimage.text
df_room_image.loc[global_row_i] = [global_row_i + 1, image_url]
global_row_i = global_row_i + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment