Skip to content

Instantly share code, notes, and snippets.

@nrupatunga
Created June 7, 2018 13:38
Show Gist options
  • Save nrupatunga/b307893a0b97275253e97bb975a0bd83 to your computer and use it in GitHub Desktop.
Save nrupatunga/b307893a0b97275253e97bb975a0bd83 to your computer and use it in GitHub Desktop.
import requests
from lxml import html
# conferences = ["CVPR2013","ICCV2013","CVPR2014","CVPR2015"]
# conferences = ["CVPR2017","ICCV2017","CVPR2016","ICCV2016"]
conferences = ["CVPR2018"]
for conf in conferences:
# Get the HTML text and find the classes of type 'ptitle'
response = requests.get("http://openaccess.thecvf.com/" + conf + ".py")
tree = html.fromstring(response.text)
papers = tree.find_class('ptitle')
__import__('pdb').set_trace()
# Get all titles in a list
all_titles = []
for paper in papers:
title = paper.xpath('a/text()')
all_titles.append(title[0])
# Search for the 'deep'-inducing keywords
# keywords = ['deep', 'cnn', 'convolutional', 'neural network']
keywords = ['tracking']
count = 0
for title in all_titles:
for kword in keywords:
if title.lower().find(kword) >= 0:
print(title)
count = count + 1
break
percent = count / float(len(all_titles)) * 100
print("%s: %.2f%%" % (conf, percent))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment