brienna/parse_article_details.py

## parse_article_details.py
data = {'headline': [],
        'date': [],
        'doc_type': [],
        'material_type': [],
        'section': [],
        'keywords': []}

for response in responses: # For each response, get all the articles
    articles = response['response']['docs']
    for article in articles: # For each article, make sure it falls within our date range
        date = dateutil.parser.parse(article['pub_date']).date()
        is_in_range = date > start and date < end
        if is_in_range and article['headline']['main']: # Collect its details, only if it has a headline
            data['date'].append(date)
            data['headline'].append(article['headline']['main'])
            data['section'].append(article['section_name'])
            data['doc_type'].append(article['document_type'])
            if 'material_type' in article:
                data['material_type'].append(article['type_of_material'])
            else:
                data['material_type'].append(None)
            keywords = [keyword['value'] for keyword in article['keywords'] if keyword['name'] == 'subject']
            data['keywords'].append(keywords)

df = pd.DataFrame(data)
df.to_csv('NYT.csv', index=False)
	data = {'headline': [],
	'date': [],
	'doc_type': [],
	'material_type': [],
	'section': [],
	'keywords': []}

	for response in responses: # For each response, get all the articles
	articles = response['response']['docs']
	for article in articles: # For each article, make sure it falls within our date range
	date = dateutil.parser.parse(article['pub_date']).date()
	is_in_range = date > start and date < end
	if is_in_range and article['headline']['main']: # Collect its details, only if it has a headline
	data['date'].append(date)
	data['headline'].append(article['headline']['main'])
	data['section'].append(article['section_name'])
	data['doc_type'].append(article['document_type'])
	if 'material_type' in article:
	data['material_type'].append(article['type_of_material'])
	else:
	data['material_type'].append(None)
	keywords = [keyword['value'] for keyword in article['keywords'] if keyword['name'] == 'subject']
	data['keywords'].append(keywords)

	df = pd.DataFrame(data)
	df.to_csv('NYT.csv', index=False)