vikjam/nytimes.py

## nytimes.py
#!/usr/bin/env python
from lxml import html
import requests
import nltk

page         = requests.get('http://nytimes.com')
tree         = html.fromstring(page.text)
top_stories  = tree.xpath('//*[@id="top-news"]/*//h2/a/text()')
more_stories = tree.xpath('//*[@id="main"]/div[10]/*//article/h2/a/text()')
all_stories  = top_stories + more_stories

for story in all_stories:
    sentence = str(story).strip().replace("\n", " ")
    tokens   = nltk.word_tokenize(sentence)
    tagged   = nltk.pos_tag(tokens)
    nnps     = [word for (word, tag) in tagged if tag.startswith('NNP')]
    print(' '.join(nnps))


# End of script
	#!/usr/bin/env python
	from lxml import html
	import requests
	import nltk

	page = requests.get('http://nytimes.com')
	tree = html.fromstring(page.text)
	top_stories = tree.xpath('//[@id="top-news"]///h2/a/text()')
	more_stories = tree.xpath('//[@id="main"]/div[10]///article/h2/a/text()')
	all_stories = top_stories + more_stories

	for story in all_stories:
	sentence = str(story).strip().replace("\n", " ")
	tokens = nltk.word_tokenize(sentence)
	tagged = nltk.pos_tag(tokens)
	nnps = [word for (word, tag) in tagged if tag.startswith('NNP')]
	print(' '.join(nnps))


	# End of script