Instantly share code, notes, and snippets.

Embed
What would you like to do?
使い捨てコードのため、殴り書き
from bs4 import BeautifulSoup
import requests
import unicodedata
url = "https://tsukuba-daigaku.com/?bukken=jsearch&shu=2&kalc=0&kahc=0&mad%5B%5D=110&mad%5B%5D=120&tik=0&cor=0"
districts = {}
count = 0
for i in range(0, 40):
request = requests.get(url)
soup = BeautifulSoup(request.text, "html.parser")
address_results = soup.find_all(class_="jusho")
address_list = list()
for address in address_results:
formatted_address = unicodedata.normalize("NFKC", address.dd.string.replace("丁目", "").replace("つくば市", "").strip())
address_list.append(formatted_address)
rent_results = soup.find_all(class_="dpoint3")
for j, rent in enumerate(rent_results):
if districts.get(address_list[j]) == None:
districts[address_list[j]] = list()
if rent.string.strip():
rent = float(rent.string.strip().replace("万円", ""))
districts.get(address_list[j]).append(rent)
count = count + 0
next_url_result = soup.find("a", text=str(i + 2))
url = next_url_result['href']
print(str(i) + ":End")
print("総データ数: " + str(count))
print("地名,件数,平均値,中央値")
keys = sorted(list(districts))
for key in keys:
district = districts[key]
if len(district) % 2 == 0:
median = (district[len(district) // 2 - 1] + district[len(district) // 2]) / 2
else:
median = district[len(district) // 2]
median = round(median, 2)
average = round(sum(district) / len(district), 2)
print(key + "," + str(len(district)) + "," + str(average) + "," + str(median))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment