Skip to content

Instantly share code, notes, and snippets.

@ku-kim
Created June 6, 2022 11:17
Show Gist options
  • Save ku-kim/d3bc8b4b4351dd105e48736bf0a3bded to your computer and use it in GitHub Desktop.
Save ku-kim/d3bc8b4b4351dd105e48736bf0a3bded to your computer and use it in GitHub Desktop.
Airbnb - Dummy Data : rooms
from bs4 import BeautifulSoup
import random
from faker import Faker
import numpy as np
import pandas as pd
from scipy.stats import skewnorm
# Faker Library Locale setup
fake = Faker('ko-KR')
df_rooms = pd.DataFrame(columns=['host_id', 'title', 'description', 'address' , 'lat', 'lng' ,
'bathroom_count', 'bed_count', 'bedroom_count', 'header_count_capacity',
'cleaning_fee', 'daily_price', 'lodging_tax_ratio', 'sale_ratio', 'service_fee',
'rating_star_score', 'review_count'])
def create_rooms(df_rooms, room_size, host_id_size):
room_types = ['호텔', '민박', '모텔', '도미토리', '아파트', '멘션', '게스트하우스', '한옥']
room_title = fake.company() + ' ' + np.random.choice(room_types)
price_normal_distribution = np.random.randint(0, 120)
skew_norm_distribution = skewnorm.rvs(20, size=room_size + 300)
price_distribution = np.round((skew_norm_distribution[skew_norm_distribution > 0] * 300000) + 3000, 1)
for row_i in range(room_size):
lat_lng = fake.local_latlng(country_code= 'KR') # ('37.1759', '128.9889', 'T’aebaek', 'KR', 'Asia/Seoul')
host_id = random.randint(1, host_id_size)
title = room_title = fake.company() + ' ' + np.random.choice(room_types) # 숙소 이름 e.g. 김김이 게스트하우스
description = fake.text() # 숙소 설명 e.g. Quibusdam voluptatem omnis odio. Veniam nihil amet. Ratione minus repudiandae enim accusamus possimus.
address = fake.address() #
lat = lat_lng[0] # 위도 e.g 34.8825
lng = lat_lng[1] # 경도 e.g 128.62667
bathroom_count = random.randint(1, 3) # 화장실 수 e.g. 3
bed_count = random.randint(1, 5) # 침대 수 e.g. 2
bedroom_count = random.randint(1, 5) # 침실 수 e.g. 2
headcount_capacity = random.randint(1, 10) # 최대 인원수 e.g. 8
cleaning_fee = random.randint(10, 100) * 100 # 청소비 e.g. 5000
daily_price = int(price_distribution[row_i]) # 일일 가격 e.g. 30000 ~ 1500000 편향분포
lodging_tax_ratio = 10 # 세금(10%) e.g. 10
sale_ratio = random.randint(1, 10) # 할인률(10%) e.g. 10
service_fee = random.randint(5, 50) * 100 # 서비스 비용 e.g. 5000
rating_star_score = round(random.uniform(1, 5), 2) # 평점 e.g. 3.3
review_count = random.randint(10, 500) # 리뷰개수
df_rooms.loc[row_i] = [host_id, title, description, address, lat, lng,
bathroom_count, bed_count, bedroom_count, headcount_capacity,
cleaning_fee, daily_price, lodging_tax_ratio, sale_ratio, service_fee,
rating_star_score, review_count]
return df_rooms
df_rooms = create_rooms(df_rooms, 5000, 2000)
df_rooms.to_csv('./rooms_plus.csv', index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment