Skip to content

Instantly share code, notes, and snippets.

@bertjiazheng
Forked from jponttuset/gans_vs_deep.py
Created September 18, 2018 10:56
Show Gist options
  • Save bertjiazheng/eba33e7564c95ce2c4841b38f25e5793 to your computer and use it in GitHub Desktop.
Save bertjiazheng/eba33e7564c95ce2c4841b38f25e5793 to your computer and use it in GitHub Desktop.
Scrape Paper Titles from CVF Open Access and Plot Evolution of GANs vs Deep in XKCD Style
import matplotlib.pyplot as plt
def get_percent_from_files(confs, keywords):
percents = []
for conf in confs:
file = open('titles/'+conf+'.txt', 'r')
all_titles = [line.rstrip('\n') for line in file.readlines()]
file.close()
count = 0
for title in all_titles:
for kword in keywords:
if kword + ' ' in title.lower() or ' ' + kword in title.lower() or kword + ':' in title.lower() or kword + ',' in title.lower():
count = count+1
break
percents.append(count/float(len(all_titles))*100)
return percents
conferences = ["CVPR2013","ICCV2013","CVPR2014","ECCV2014","CVPR2015","ICCV2015","CVPR2016","ECCV2016","CVPR2017", "ICCV2017", "CVPR2018", "ECCV2018"]
xval = range(0, len(conferences))
## GAN vs deep
# Set the style to XKCD
plt.xkcd()
plt.figure(figsize=(6,4))
# Plot the percents
plt.plot(xval,get_percent_from_files(conferences,['deep', 'cnn', 'cnns', 'convolutional', 'neural network', 'neural networks']), marker='o', label="Deep")
plt.plot(xval,get_percent_from_files(conferences,['adversarial','adversarially','gans', 'gan']), marker='o', label="GAN")
plt.plot(xval,get_percent_from_files(conferences,['lstm','lstms', 'rnn', 'rnns', 'polygon-rnn', 'recurrent']), marker='o', label="LSTM")
# Annotate and fine-tune
plt.title("Deep vs GAN", fontsize=13)
plt.legend(loc='upper left', fontsize=12)
plt.xticks(range(0, len(conferences)), [conf.replace("20","",1) for conf in conferences])
# Fine-tune the axis
ax = plt.gca();
ax.set_axisbelow(True)
ax.set_xlim([-0.1, len(conferences)-0.9]);
ax.set_ylim([-0.5, 25]);
for tick in ax.xaxis.get_major_ticks():
tick.label.set_fontsize(12)
tick.label.set_rotation(20)
for tick in ax.yaxis.get_major_ticks():
tick.label.set_fontsize(12)
plt.subplots_adjust(bottom=0.12)
plt.ylabel("Percentage of papers (%)", fontsize=12)
# Save
plt.savefig('deep_vs_gan_evolution.png', dpi=200)
plt.close()
import requests
from lxml import html
conference = "ECCV2018"
# Get the HTML text and find the classes of type 'ptitle'
response = requests.get("http://openaccess.thecvf.com/"+conference+".py")
tree = html.fromstring(response.text)
papers = tree.find_class('ptitle')
# Get all titles in a list
all_titles = []
for paper in papers:
title = paper.xpath('a/text()')
all_titles.append(title[0])
# Print to file
f = open(conference + '.txt', 'w')
for title in all_titles:
f.write(title.encode('ascii', errors='backslashreplace')+'\n')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment