-
-
Save bertjiazheng/eba33e7564c95ce2c4841b38f25e5793 to your computer and use it in GitHub Desktop.
Scrape Paper Titles from CVF Open Access and Plot Evolution of GANs vs Deep in XKCD Style
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
def get_percent_from_files(confs, keywords): | |
percents = [] | |
for conf in confs: | |
file = open('titles/'+conf+'.txt', 'r') | |
all_titles = [line.rstrip('\n') for line in file.readlines()] | |
file.close() | |
count = 0 | |
for title in all_titles: | |
for kword in keywords: | |
if kword + ' ' in title.lower() or ' ' + kword in title.lower() or kword + ':' in title.lower() or kword + ',' in title.lower(): | |
count = count+1 | |
break | |
percents.append(count/float(len(all_titles))*100) | |
return percents | |
conferences = ["CVPR2013","ICCV2013","CVPR2014","ECCV2014","CVPR2015","ICCV2015","CVPR2016","ECCV2016","CVPR2017", "ICCV2017", "CVPR2018", "ECCV2018"] | |
xval = range(0, len(conferences)) | |
## GAN vs deep | |
# Set the style to XKCD | |
plt.xkcd() | |
plt.figure(figsize=(6,4)) | |
# Plot the percents | |
plt.plot(xval,get_percent_from_files(conferences,['deep', 'cnn', 'cnns', 'convolutional', 'neural network', 'neural networks']), marker='o', label="Deep") | |
plt.plot(xval,get_percent_from_files(conferences,['adversarial','adversarially','gans', 'gan']), marker='o', label="GAN") | |
plt.plot(xval,get_percent_from_files(conferences,['lstm','lstms', 'rnn', 'rnns', 'polygon-rnn', 'recurrent']), marker='o', label="LSTM") | |
# Annotate and fine-tune | |
plt.title("Deep vs GAN", fontsize=13) | |
plt.legend(loc='upper left', fontsize=12) | |
plt.xticks(range(0, len(conferences)), [conf.replace("20","",1) for conf in conferences]) | |
# Fine-tune the axis | |
ax = plt.gca(); | |
ax.set_axisbelow(True) | |
ax.set_xlim([-0.1, len(conferences)-0.9]); | |
ax.set_ylim([-0.5, 25]); | |
for tick in ax.xaxis.get_major_ticks(): | |
tick.label.set_fontsize(12) | |
tick.label.set_rotation(20) | |
for tick in ax.yaxis.get_major_ticks(): | |
tick.label.set_fontsize(12) | |
plt.subplots_adjust(bottom=0.12) | |
plt.ylabel("Percentage of papers (%)", fontsize=12) | |
# Save | |
plt.savefig('deep_vs_gan_evolution.png', dpi=200) | |
plt.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
conference = "ECCV2018" | |
# Get the HTML text and find the classes of type 'ptitle' | |
response = requests.get("http://openaccess.thecvf.com/"+conference+".py") | |
tree = html.fromstring(response.text) | |
papers = tree.find_class('ptitle') | |
# Get all titles in a list | |
all_titles = [] | |
for paper in papers: | |
title = paper.xpath('a/text()') | |
all_titles.append(title[0]) | |
# Print to file | |
f = open(conference + '.txt', 'w') | |
for title in all_titles: | |
f.write(title.encode('ascii', errors='backslashreplace')+'\n') | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment