paperlefthand/get_acts.py

## get_acts.py
import requests
from bs4 import BeautifulSoup
import csv
import time
import lxml

base_url = 'https://en.wikipedia.org/wiki/'
items = ["actors", "actresses"]

for i in items:
    target_url = base_url + "List_of_Japanese_" + i
    target_html = requests.get(target_url).text
    soup = BeautifulSoup(target_html, 'lxml')
    # print(target_url)
    names = soup.select('div.mw-parser-output > h2 + ul > li > a')

    # To CSV
    acts = []
    print('   getting names of %s ...' % i)
    for name in names:
        acts.append(name.get_text())
        # print(name.string)
    time.sleep(1)

    # To File
    with open('%s.csv' % i, 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerow(['name'])
        # 最終行は"See also"項目の"List of Japanese ..."なので取り除く
        for name in acts[:-1]:
            writer.writerow([name])
	import requests
	from bs4 import BeautifulSoup
	import csv
	import time
	import lxml

	base_url = 'https://en.wikipedia.org/wiki/'
	items = ["actors", "actresses"]

	for i in items:
	target_url = base_url + "List_of_Japanese_" + i
	target_html = requests.get(target_url).text
	soup = BeautifulSoup(target_html, 'lxml')
	# print(target_url)
	names = soup.select('div.mw-parser-output > h2 + ul > li > a')

	# To CSV
	acts = []
	print(' getting names of %s ...' % i)
	for name in names:
	acts.append(name.get_text())
	# print(name.string)
	time.sleep(1)

	# To File
	with open('%s.csv' % i, 'w') as f:
	writer = csv.writer(f, lineterminator='\n')
	writer.writerow(['name'])
	# 最終行は"See also"項目の"List of Japanese ..."なので取り除く
	for name in acts[:-1]:
	writer.writerow([name])