Last active
June 25, 2022 16:37
-
-
Save didhd/41a3232f0f321230951f0e309cc661c4 to your computer and use it in GitHub Desktop.
가상의 한국어 이름 유저 데이터 생성하기
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import urllib | |
import json | |
from urllib import parse | |
from urllib.request import Request, urlopen | |
from random import randrange | |
from bs4 import BeautifulSoup | |
from korean_name_generator import namer | |
users = [] | |
# male_count와 female_count를 설정해주세요. | |
male_names = [] | |
male_count = 10 | |
female_names = [] | |
female_count = 10 | |
while len(male_names) < male_count: | |
name = namer.generate(True) | |
if name not in male_names: | |
male_names.append(name) | |
while len(female_names) < female_count: | |
name = namer.generate(False) | |
if name not in female_names: | |
female_names.append(name) | |
# 한글이름을 영어로 번역할 주소 | |
naver_url = 'https://dict.naver.com/name-to-roman/translation/?query=' | |
def get_eng_name(name): | |
""" 한글이름을 영어로 번역하는 함수 """ | |
name_url = naver_url + urllib.parse.quote(name) | |
req = Request(name_url) | |
res = urlopen(req) | |
html = res.read().decode('utf-8') | |
bs = BeautifulSoup(html, 'html.parser') | |
# Beautiful Soup를 사용해 Selector로 영문 이름 획득 | |
name_tags = bs.select('#container > div > table > tbody > tr > td > a') | |
names = [name_tag.text for name_tag in name_tags] | |
if len(names) == 0: | |
return 'username' | |
return names[0] | |
if male_count > 0: | |
for i, male_name in enumerate(male_names): | |
male = {} | |
male['id'] = i + 1 | |
male['name'] = male_name | |
male['gender'] = 'M' | |
# 10대 - 50대에 걸쳐 생성되도록 age 설정 | |
male['age'] = 10 + 10 * int(i / (male_count/5)) + randrange(10) | |
try: | |
male['username'] = get_eng_name( | |
male_name).lower().replace(' ', '.') + '{:04d}'.format(i + 1) | |
except: | |
male['username'] = 'username' + '{:04d}'.format(i + 1) | |
male['email'] = male['username'] + '@example.com' | |
users.append(male) | |
print('현재 ' + str(i + 1) + ' 번째 진행중: ' + male['username']) | |
if female_count > 0: | |
for i, female_name in enumerate(female_names): | |
female = {} | |
female['id'] = i + male_count + 1 | |
female['name'] = female_name | |
female['gender'] = 'F' | |
female['age'] = 10 + 10 * int(i / (female_count/5)) + randrange(10) | |
try: | |
female['username'] = get_eng_name( | |
female_name).lower().replace(' ', '.') + '{:04d}'.format(i + male_count + 1) | |
except: | |
female['username'] = 'username' + '{:04d}'.format(i + 1) | |
female['email'] = female['username'] + '@example.com' | |
users.append(female) | |
print('현재 ' + str(i + male_count + 1) + | |
' 번째 진행중: ' + female['username']) | |
with open('user_data.json', 'w', encoding='UTF-8') as json_file: | |
json.dump(users, json_file, ensure_ascii=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See my article on Blog for details.