Jason S. Kessler JasonKessler

## monkey_patch_keras_custom_object.py
import mock, keras

def custom(x):
  return x # Replace this with your activation function.

model_json = open('config.json').read()

'''
Trying to run:

## gist:8563282fcedf010bc809e24f647b780b
def apache_combined_log(request: flask.request):
	path = request.path
	if len(request.query_string) > 0:
		path += '?' + request.query_string.decode('utf-8')
	size = '-'
	if request.__sizeof__() > 0:
		size = request.__sizeof__()
	referrer = '-'
	if request.referrer:
		referrer = request.referrer

## gist:ddc8211accc3f8322b481d63ec880d4a
### Keybase proof

I hereby claim:

  * I am jasonkessler on github.
  * I am jasonkessler (https://keybase.io/jasonkessler) on keybase.
  * I have a public key ASCPvUWxdEZQwSzjQWQxLIxn9xolt5fWO6Tr2SFQ-_tsEAo

To claim this, I am signing this object:

## crawl_icrl_2018_from_openreview.py
url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'
df = pd.DataFrame(requests.get(url).json()['notes']) # Each row in this data frame is a paper.
forum_content = []
for i, forum_id in list(enumerate(df.forum)): # Each forum_id is a review, comment, or acceptance decision about a paper.
  forum_content.append(requests.get('https://openreview.net/notes?forum={}&trash=true'.format(forum_id)).json())
  time.sleep(.3)
df['forumContent'] = pd.Series(forum_content)

## format_scraped_iclr_reviews.py
url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'
df = pd.DataFrame(requests.get(url).json()['notes']) # Each row in this data frame is a paper.
forum_content = []
for i, forum_id in list(enumerate(df.forum)): # Each forum_id is a review, comment, or acceptance decision about a paper.
  forum_content.append(requests.get('https://openreview.net/notes?forum={}&trash=true'.format(forum_id)).json())
  time.sleep(.3)
df['forumContent'] = pd.Series(forum_content)

df['title'] = df.content.apply(lambda x: x['title'])
df['authors'] = df.content.apply(lambda x: x['authors'])

## iclrreviews.csv
title,authors,decision_raw,forum,confidence,rating,review
Improving Discriminator-Generator Balance in Generative Adversarial Networks,['Simen Selseng and Björn Gambäck'],Reject,SyBPtQfAZ,4: The reviewer is confident but not absolutely certain that the evaluation is correct,3: Clear rejection,"The paper proposes a variety of modifications to improve GAN training and evaluates them using a variant of the Generative Adversarial Metric.

The first proposed approach, Static Reusable Noise, proposes sampling a fixed set of latent noise vectors instead of producing them via online sampling. It is motivated by the observation that the generator encounters different noise samples at each iteration of training while for real data the discriminator sees only a fixed number of samples. This does not seem to be a particularly convincing argument. One could argue likewise that this makes the discriminator's job easier as it only has to track the finite amount of samples the generator can produce instead of the full distri

## dense_iclr_scattertext.py
import pandas as pd
import scattertext as st
import spacy

reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en'))
corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'rating_bin', parsed_col = 'parse')
          .build().remove_categories(['Neutral']))
html = st.produce_scattertext_explorer(corpus,
                                       category='Positive', not_categories=['Negative'],

## ecdfpurepy.py
import bisect
class ECDFPurePy(object):
    '''
    ECDF with linear interpolation
    '''
    def __init__(self, raw_list, min_possible, max_possible, resolution=1000):
        '''
        raw: sorted list or generator of numbers
        '''
        self.resolution_ = resolution

## iclr_positive_negative_loridp.py
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))
full_corpus = st.CorpusFromParsedDocuments(reviews_df, category_col='decision', parsed_col='parse').build()
corpus = full_corpus.remove_categories(['Workshop'])
priors = (st.PriorFactory(full_corpus, term_ranker=st.OncePerDocFrequencyRanker)
          .use_all_categories().align_to_target(corpus).get_priors())
html = st.produce_frequency_explorer(
  corpus,
  category='Accept',
  not_categories=['Reject'],

## iclr_positive_negative_loridp.py
reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))

# Create Corpus based on accept/reject/workshop decision
full_corpus = st.CorpusFromParsedDocuments(
          reviews_df, category_col='decision', parsed_col='parse').build()

# A two-category corpus to use for plotting, with unigrams which only occur in bigrams removed.
# Terms used in <5 documents are removed as well.
corpus = st.CompactTerms(full_corpus.remove_categories(['Workshop']),
	import mock, keras

	def custom(x):
	return x # Replace this with your activation function.

	model_json = open('config.json').read()

	'''
	Trying to run:
	def apache_combined_log(request: flask.request):
	path = request.path
	if len(request.query_string) > 0:
	path += '?' + request.query_string.decode('utf-8')
	size = '-'
	if request.__sizeof__() > 0:
	size = request.__sizeof__()
	referrer = '-'
	if request.referrer:
	referrer = request.referrer
	### Keybase proof

	I hereby claim:

	* I am jasonkessler on github.
	* I am jasonkessler (https://keybase.io/jasonkessler) on keybase.
	* I have a public key ASCPvUWxdEZQwSzjQWQxLIxn9xolt5fWO6Tr2SFQ-_tsEAo

	To claim this, I am signing this object:
	url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'
	df = pd.DataFrame(requests.get(url).json()['notes']) # Each row in this data frame is a paper.
	forum_content = []
	for i, forum_id in list(enumerate(df.forum)): # Each forum_id is a review, comment, or acceptance decision about a paper.
	forum_content.append(requests.get('https://openreview.net/notes?forum={}&trash=true'.format(forum_id)).json())
	time.sleep(.3)
	df['forumContent'] = pd.Series(forum_content)
	title,authors,decision_raw,forum,confidence,rating,review
	Improving Discriminator-Generator Balance in Generative Adversarial Networks,['Simen Selseng and Björn Gambäck'],Reject,SyBPtQfAZ,4: The reviewer is confident but not absolutely certain that the evaluation is correct,3: Clear rejection,"The paper proposes a variety of modifications to improve GAN training and evaluates them using a variant of the Generative Adversarial Metric.

	The first proposed approach, Static Reusable Noise, proposes sampling a fixed set of latent noise vectors instead of producing them via online sampling. It is motivated by the observation that the generator encounters different noise samples at each iteration of training while for real data the discriminator sees only a fixed number of samples. This does not seem to be a particularly convincing argument. One could argue likewise that this makes the discriminator's job easier as it only has to track the finite amount of samples the generator can produce instead of the full distri
	import pandas as pd
	import scattertext as st
	import spacy

	reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
	reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en'))
	corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'rating_bin', parsed_col = 'parse')
	.build().remove_categories(['Neutral']))
	html = st.produce_scattertext_explorer(corpus,
	category='Positive', not_categories=['Negative'],
	import bisect
	class ECDFPurePy(object):
	'''
	ECDF with linear interpolation
	'''
	def __init__(self, raw_list, min_possible, max_possible, resolution=1000):
	'''
	raw: sorted list or generator of numbers
	'''
	self.resolution_ = resolution
	reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
	reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))
	full_corpus = st.CorpusFromParsedDocuments(reviews_df, category_col='decision', parsed_col='parse').build()
	corpus = full_corpus.remove_categories(['Workshop'])
	priors = (st.PriorFactory(full_corpus, term_ranker=st.OncePerDocFrequencyRanker)
	.use_all_categories().align_to_target(corpus).get_priors())
	html = st.produce_frequency_explorer(
	corpus,
	category='Accept',
	not_categories=['Reject'],
	reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')
	reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))

	# Create Corpus based on accept/reject/workshop decision
	full_corpus = st.CorpusFromParsedDocuments(
	reviews_df, category_col='decision', parsed_col='parse').build()

	# A two-category corpus to use for plotting, with unigrams which only occur in bigrams removed.
	# Terms used in <5 documents are removed as well.
	corpus = st.CompactTerms(full_corpus.remove_categories(['Workshop']),