Skip to content

Instantly share code, notes, and snippets.

@ohoroyoi
Last active August 2, 2019 02:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ohoroyoi/3d616c2f94e39e32455db682dd50229e to your computer and use it in GitHub Desktop.
Save ohoroyoi/3d616c2f94e39e32455db682dd50229e to your computer and use it in GitHub Desktop.
import requests
from lxml import etree
import wikipedia
import csv
import wikipediaapi
wiki_en = wikipediaapi.Wikipedia('en')
wiki_en = wikipediaapi.Wikipedia(
language='en',
extract_format=wikipediaapi.ExtractFormat.WIKI
)
with open("/Users/n18016/Documents/workspace/tour-geolocation-data/get_these_airlines.txt", "r") as f:
airline_list = list(f)
with open('/Users/n18016/Documents/workspace/tour-geolocation-data/works/airlines/airline_detail_6.csv', 'w', newline='') as outcsv:
fields = ["airline", "IATA", "ICAO", "Callsign", "Destination count", "url"]
writer = csv.DictWriter(outcsv, fieldnames=fields)
writer.writeheader()
for line in airline_list:
if line is not '\n':
print("**************************************************")
print(line)
if any(x in line for x in wikipedia.search(line)):
print("이거 ")
print(line)
try:
each_airline = wikipedia.page(line)
print("존재함: ")
print("each_airline.url : ")
print(each_airline.url)
except wikipedia.exceptions.DisambiguationError as e:
print(line, file=open('/Users/n18016/Documents/workspace/tour-geolocation-data/works/airlines/airline_error_list.txt', 'a'))
except wikipedia.exceptions.PageError as e:
print(line, file=open('/Users/n18016/Documents/workspace/tour-geolocation-data/works/airlines/airline_error_list.txt', 'a'))
pass
print("**************************************************")
if each_airline.url is not '':
req = requests.get(each_airline.url)
store = etree.fromstring(req.text)
airline_codes = store.xpath('//td[@class="nickname"]')
destination_count = store.xpath('//table[@class="infobox vcard"]//tr[th/text()="Destinations"]/td')
one_row = { "airline" : line.rstrip('\n') }
if len(destination_count) > 0:
one_row["Destination count"] = destination_count[0].text
else:
one_row["Destination count"] = "-"
# one_row = { "airline" : line.rstrip('\n') }
div_cnt = 0
if len(airline_codes) == 3:
for div in airline_codes:
if div.text is not None:
one_row[div_cnt] = (div.text).rstrip('\n')
elif div.text is '':
one_row[div_cnt] = "-"
else:
one_row[div_cnt] = "-"
div_cnt = div_cnt + 1
one_row["IATA"] = one_row.pop(0)
one_row["ICAO"] = one_row.pop(1)
one_row["Callsign"] = one_row.pop(2)
elif len(airline_codes) > 0 & len(airline_codes) != 3:
print(line, file=open('/Users/n18016/Documents/workspace/tour-geolocation-data/works/airlines/airline_error_list.txt', 'a'))
pass
else:
one_row["IATA"] = "-"
one_row["ICAO"] = "-"
one_row["Callsign"] = "-"
one_row["url"] = each_airline.url
print("one_row: ")
print(one_row)
writer.writerow(one_row)
else:
print("없음 : ")
print(line, file=open('/Users/n18016/Documents/workspace/tour-geolocation-data/works/airlines/airline_error_list.txt', 'a'))
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment