skillachie/storm_articles.py

## storm_articles.py
from news_corpus_builder import NewsCorpusGenerator
from pprint import pprint
import sys

# Location where you want to save the news articles
corpus_dir = '/home/skillachie/Development/event_articles'

category_total = 300
extractor = NewsCorpusGenerator(corpus_dir,'file')

def get_links(terms,category):
    category_articles = []
    article_count = int(category_total/len(terms))
    for term in terms:
        category_articles.extend(extractor.google_news_search(term,category,article_count))
    return category_articles


# Hurricane search terms. Can be multiple
storm_terms = ['Hurricane Joaquin']

# 'Storms' represents the category to assign to this event. Articles will be saved in a Storms folder on your filesystem
article_links = get_links(storm_terms,'Storms')
print len(article_links)


# Extract Content & Create Corpus
print "Total %d links to extract" % len(article_links)
extractor.generate_corpus(article_links)
print extractor.get_stats()
	from news_corpus_builder import NewsCorpusGenerator
	from pprint import pprint
	import sys

	# Location where you want to save the news articles
	corpus_dir = '/home/skillachie/Development/event_articles'

	category_total = 300
	extractor = NewsCorpusGenerator(corpus_dir,'file')

	def get_links(terms,category):
	category_articles = []
	article_count = int(category_total/len(terms))
	for term in terms:
	category_articles.extend(extractor.google_news_search(term,category,article_count))
	return category_articles


	# Hurricane search terms. Can be multiple
	storm_terms = ['Hurricane Joaquin']

	# 'Storms' represents the category to assign to this event. Articles will be saved in a Storms folder on your filesystem
	article_links = get_links(storm_terms,'Storms')
	print len(article_links)


	# Extract Content & Create Corpus
	print "Total %d links to extract" % len(article_links)
	extractor.generate_corpus(article_links)
	print extractor.get_stats()