Skip to content

Instantly share code, notes, and snippets.

@faizankshaikh
Last active May 3, 2020 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save faizankshaikh/c7ceebba241226c40686b343ff61d734 to your computer and use it in GitHub Desktop.
Save faizankshaikh/c7ceebba241226c40686b343ff61d734 to your computer and use it in GitHub Desktop.
Case study on preferred tools in ICLR 2020
root = "."
dirlist = [item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item))]
print(dirlist)
code_links_df.loc[code_links_df.domains == "github.com"].links.apply(
lambda x: urlparse(x)[2].split("/")[1]
).value_counts().head(10)
def cleaner(tool_list):
cleaned_list = ""
try:
cleaned_list = []
for tool in tool_list:
cleaned_tool = re.findall("^\w+", tool)
if not cleaned_tool:
pass
else:
cleaned_list.append(cleaned_tool[0])
cleaned_list = ",".join(cleaned_list)
return cleaned_list
except:
tool_list = ",".join(tool_list)
"unclean_list".join(tool_list)
return tool_list
all_tools["all_tool_names_cleaned"] = all_tools.all_tool_names.str.split(",").apply(
cleaner
)
# takes about 24 minutes to download
for link in github_repo_links:
!git clone $link --depth 1 --quiet
has_req_cnt = no_req_cnt = 0
for repo in dirlist:
path = "/content/" + repo
if os.path.exists(path + "/requirements.txt"):
has_req_cnt += 1
else:
no_req_cnt += 1
# takes about 10 minutes to run
for repo in dirlist:
path = "/content/" + repo
if os.path.exists(path + "/requirements.txt"):
pass
else:
!pipreqs $path
code_links_df["domains"] = code_links_df.links.apply(lambda x: urlparse(x)[1])
all_tools.all_tool_names_cleaned.str.split(",", expand=True).stack().unique().shape
give_score("gym")
give_score("networkx")
give_score("tensorboard")
give_score("torch")
print()
give_score("tensorflow", offset=12)
print()
give_score("keras")
give_score("transformers")
all_tools.all_tool_names_cleaned.str.contains("torch").sum()
def give_score(tool_name, offset=0):
num = all_tools.all_tool_names_cleaned.str.contains(tool_name).sum()
num += offset
print(
"Count of {} is {} and total usage is {}%".format(
tool_name, num, round((num / (all_tools.shape[0]+offset)) * 100, 4)
)
)
%matplotlib inline
import os
import re
import sys
import requests
import openreview
import pandas as pd
import matplotlib.pyplot as plt
from random import choice
from wordcloud import WordCloud
from urllib.parse import urlparse
!pip install openreview-py
!pip install pipreqs
all_tools = pd.read_csv("all_tools.csv")
all_tools = pd.DataFrame(
{"all_repo_names": all_repo_names, "all_tool_names": all_tool_names}
)
all_tools.head()
client = openreview.Client(baseurl="https://openreview.net")
blind_notes = {
note.id: note
for note in openreview.tools.iterget_notes(
client,
invitation="ICLR.cc/2020/Conference/-/Blind_Submission",
details="original",
)
}
all_decision_notes = openreview.tools.iterget_notes(
client, invitation="ICLR.cc/2020/Conference/Paper.*/-/Decision"
)
accepted_submissions = [
blind_notes[decision_note.forum]
for decision_note in all_decision_notes
if "Accept" in decision_note.content["decision"]
]
len(accepted_submissions)
all_tools.all_tool_names_cleaned.str.split(",", expand=True).stack().value_counts()[
:10
].plot(kind="bar")
has_req_cnt, no_req_cnt
all_tools.all_tool_names_cleaned.str.split(",", expand=True).stack().value_counts()[:50]
all_repo_names = []
all_tool_names = []
for repo in dirlist:
try:
repo_name = repo
with open("/content/" + repo + "/" + "requirements.txt", "r") as f:
tools = f.readlines()
tool_names = ",".join(tools).lower()
all_repo_names.append(repo_name)
all_tool_names.append(tool_names)
except:
print("Unexpected error for ", repo, sys.exc_info()[0])
all_tools = pd.read_csv("all_tools.csv")
dirlist.remove('.config')
dirlist.remove('sample_data')
all_tools.to_csv("all_tools.csv", index=False)
all_tool_string = ",".join(all_tools.all_tool_names_cleaned)
wordcloud = WordCloud(background_color="white", max_words=100)
wordcloud.generate(all_tool_string)
plt.figure(figsize=(10, 20))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment