Skip to content

Instantly share code, notes, and snippets.

@ansakoy
Last active August 29, 2015 14:16
Show Gist options
  • Save ansakoy/34fb3cbd0a947aa17b7c to your computer and use it in GitHub Desktop.
Save ansakoy/34fb3cbd0a947aa17b7c to your computer and use it in GitHub Desktop.
Readability - (first attempt at using API and writing a CSV file)
import requests
from bs4 import BeautifulSoup
import urllib2
import json
import csv
api = "http://api.readability.io/api/1.0/ru/measure/?url=http://government.ru"
url = "http://government.ru/meetings/"
url_for_dict = "http://government.ru"
page = requests.get(url)
soup = BeautifulSoup(page.content)
meetings = dict()
key_word = u'\u0417\u0430\u0441\u0435\u0434\u0430\u043d\u0438\u0435 \u041f\u0440\u0430\u0432\u0438\u0442\u0435\u043b\u044c\u0441\u0442\u0432\u0430 ('
# meaning - Zasedaniye Pravitelstva (
for link in soup.find_all("a"):
if key_word in link.text:
meetings[str(link.get("href"))] = link.text
encoded_dct = {k: v.encode('utf8') for (k, v) in meetings.items()}
csv_list = [['title'], ['url'], ['index_SMOG'], ['index_cl'], ['index_dc'], ['n_sentences'],
['n_words'], ['n_complex_words']]
for item in meetings.keys():
current_url = api + item
csv_list[0].append(encoded_dct[item])
csv_list[1].append(url_for_dict + item)
object = urllib2.urlopen(current_url)
data = json.load(object)
csv_list[2].append(float(data[u'indexes'][u'index_SMOG']))
csv_list[3].append(float(data[u'indexes'][u'index_cl']))
csv_list[4].append(float(data[u'indexes'][u'index_dc']))
csv_list[5].append(float(data[u'metrics'][u'n_sentences']))
csv_list[6].append(float(data[u'metrics'][u'n_words']))
csv_list[7].append(float(data[u'metrics'][u'n_complex_words']))
length = len(csv_list[0])
with open('test.csv', 'wb') as test_file:
csv_writer = csv.writer(test_file)
for y in range(length):
csv_writer.writerow([x[y] for x in csv_list])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment