Skip to content

Instantly share code, notes, and snippets.

@SeolHa314
Created January 18, 2020 11:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SeolHa314/5826933e191115e83f2e00b9cfaf253a to your computer and use it in GitHub Desktop.
Save SeolHa314/5826933e191115e83f2e00b9cfaf253a to your computer and use it in GitHub Desktop.
Get Nijisanji liver's twitter username and Youtube channel ids.
import requests
import re
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup as bs
index = 1
endIndex = 0
def get_you_twt(url, session):
global index
global endIndex
r = session.get(url)
r.raise_for_status()
if r.ok:
liverpage = bs(r.content, "lxml")
name_both = liverpage.select("body > div > div:nth-of-type(2) > div > div > section:nth-of-type(1) > div > div > div:nth-of-type(2) > div > div > div:nth-of-type(2) > div > h2")[0].get_text()
name_english = name_both[name_both.find("/") + 1:]
for a in liverpage.find_all("a",href=True):
if "twitter.com" in a["href"]:
twitter_id = a["href"][20:]
elif "youtube.com" in a["href"]:
youtube_id = a["href"][32:56]
print(str(index / (endIndex + 1) * 100) + "%% ended.")
index += 1
return (name_english, twitter_id, youtube_id)
r = requests.get("https://nijisanji.ichikara.co.jp/member/")
r.raise_for_status()
soup = bs(r.content, "lxml")
url_lists = []
for i in soup.select("#liver_list > div"):
url_lists.append(i.div.div.a["href"])
endIndex = url_lists.__len__()
session = requests.Session()
dict_livers = dict()
with ThreadPoolExecutor(max_workers=10) as executor:
future_livers = [executor.submit(get_you_twt, url=url_lists[i], session=session) for i in range(endIndex)]
for future in as_completed(future_livers):
try:
data = future.result()
except Exception as exc:
print(exc)
else:
dict_livers[data[0]] = {
"twitter": data[1],
"youtube": data[2]
}
with open("liver1.json", "w") as f:
json.dump(dict_livers, f, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment