nrupatunga/scrape_cvpr.py

## scrape_cvpr.py
import requests
from lxml import html

# conferences = ["CVPR2013","ICCV2013","CVPR2014","CVPR2015"]
# conferences = ["CVPR2017","ICCV2017","CVPR2016","ICCV2016"]
conferences = ["CVPR2018"]

for conf in conferences:
    # Get the HTML text and find the classes of type 'ptitle'
    response = requests.get("http://openaccess.thecvf.com/" + conf + ".py")
    tree = html.fromstring(response.text)
    papers = tree.find_class('ptitle')
    __import__('pdb').set_trace()

    # Get all titles in a list
    all_titles = []
    for paper in papers:
        title = paper.xpath('a/text()')
        all_titles.append(title[0])

    # Search for the 'deep'-inducing keywords
    # keywords = ['deep', 'cnn', 'convolutional', 'neural network']
    keywords = ['tracking']
    count = 0
    for title in all_titles:
        for kword in keywords:
            if title.lower().find(kword) >= 0:
                print(title)
                count = count + 1
                break

    percent = count / float(len(all_titles)) * 100
    print("%s: %.2f%%" % (conf, percent))
	import requests
	from lxml import html

	# conferences = ["CVPR2013","ICCV2013","CVPR2014","CVPR2015"]
	# conferences = ["CVPR2017","ICCV2017","CVPR2016","ICCV2016"]
	conferences = ["CVPR2018"]

	for conf in conferences:
	# Get the HTML text and find the classes of type 'ptitle'
	response = requests.get("http://openaccess.thecvf.com/" + conf + ".py")
	tree = html.fromstring(response.text)
	papers = tree.find_class('ptitle')
	__import__('pdb').set_trace()

	# Get all titles in a list
	all_titles = []
	for paper in papers:
	title = paper.xpath('a/text()')
	all_titles.append(title[0])

	# Search for the 'deep'-inducing keywords
	# keywords = ['deep', 'cnn', 'convolutional', 'neural network']
	keywords = ['tracking']
	count = 0
	for title in all_titles:
	for kword in keywords:
	if title.lower().find(kword) >= 0:
	print(title)
	count = count + 1
	break

	percent = count / float(len(all_titles)) * 100
	print("%s: %.2f%%" % (conf, percent))