Last active
December 7, 2017 09:19
-
-
Save Yuiki/c92624ed3da22008e0386e9e8c3d0367 to your computer and use it in GitHub Desktop.
使い捨てコードのため、殴り書き
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import unicodedata | |
url = "https://tsukuba-daigaku.com/?bukken=jsearch&shu=2&kalc=0&kahc=0&mad%5B%5D=110&mad%5B%5D=120&tik=0&cor=0" | |
districts = {} | |
count = 0 | |
for i in range(0, 40): | |
request = requests.get(url) | |
soup = BeautifulSoup(request.text, "html.parser") | |
address_results = soup.find_all(class_="jusho") | |
address_list = list() | |
for address in address_results: | |
formatted_address = unicodedata.normalize("NFKC", address.dd.string.replace("丁目", "").replace("つくば市", "").strip()) | |
address_list.append(formatted_address) | |
rent_results = soup.find_all(class_="dpoint3") | |
for j, rent in enumerate(rent_results): | |
if districts.get(address_list[j]) == None: | |
districts[address_list[j]] = list() | |
if rent.string.strip(): | |
rent = float(rent.string.strip().replace("万円", "")) | |
districts.get(address_list[j]).append(rent) | |
count = count + 0 | |
next_url_result = soup.find("a", text=str(i + 2)) | |
url = next_url_result['href'] | |
print(str(i) + ":End") | |
print("総データ数: " + str(count)) | |
print("地名,件数,平均値,中央値") | |
keys = sorted(list(districts)) | |
for key in keys: | |
district = districts[key] | |
if len(district) % 2 == 0: | |
median = (district[len(district) // 2 - 1] + district[len(district) // 2]) / 2 | |
else: | |
median = district[len(district) // 2] | |
median = round(median, 2) | |
average = round(sum(district) / len(district), 2) | |
print(key + "," + str(len(district)) + "," + str(average) + "," + str(median)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment