byron wallace bwallace

## AAAI.r
votes.years <- read.csv("regressme.txt", sep="\t")
dummy <- as.numeric(votes.years$vote==2)
m <- glm(dummy ~ regress.me$year, family=binomial("logit"))
> summary(m)

Call:
glm(formula = dummy ~ regress.me$year, family = binomial("logit"))

Deviance Residuals:
    Min       1Q   Median       3Q      Max

## irony-analysis.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bwallace
                / irony-analysis.ipynb
            
            
              Created
              March 7, 2014 13:11
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## more-irony
{
 "metadata": {
  "name": "irony-context"
 },
 "nbformat": 2,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",

## Tokenizing, redux
We need to deal with the "TextNodes" returned by PDF.js, which looks like this:

        {
          "str": "procedure was performed at 10 mmHg pressure. At 6 and 24 h postoperatively, a short-form McGill Questionnaire (MPQ) was",
          "dir": "ltr",
          "width": 477.8928,
          "height": 9,
          "transform": [
            9,
            0,

## gist:d7095f9cc049ddf78fec
from cochranenlp.readers.biviewer import PDFBiViewer
dat = PDFBiViewer()
# this will take a relatively long time the first
# time it is invoked (it caches the dict)
study = dat.get_study_from_pmid("16467647")
print study.cochrane['CHARACTERISTICS']

''' (something like):
{'CHAR_INTERVENTIONS': 'Tailored dietary intervention to encourage a decrease in sodium intake and an increase in vitamin C and carotene intake via increasing F&V consumption. Dietary goals were to decrease salt to less than 8 and 10g/day in women and men respectively and increase carotene intake to more than 5000 \xc2\xb5g/day and vitamin C intake to more than 200mg/day. The intervention consisted of 2 individualised dietary counselling sessions at baseline and 5 months (15 minutes each), a group lecture half-way through the intervention, and 2 newsletters. Control subjects recieved the intervention at 12 months (cross-over period). Follow-up data were presented at 12 months.',
 'CHAR_METHODS': 'RCT of cross-over design but data analysed and

## pico_SDS.py
import cochranenlp
cd cochranenlp # assuming you're in ipython-like environment...
from sds import pico_SDS
# this will take a long time, especially the first time
# because it should generate pickles of y_dict and the
# corresponding vectorizers. this will happen in
# experiments/pico_DS. these should be saved to disk
# in the sds subdirectory. therafter, you can elide the
# y_dict_pickle argument and use the default
pico_SDS.run_DS_PICO_experiments(y_dict_pickle=None)

## using_rct_robot.py
import requests
import json

# assumes RobotReviewer running locally on port 5000
url = "http://127.0.0.1:5000/is_an_rct"
# toy example
citation_data = {'title': 'a randomized control trial', 'abstract': 'hello world'}
headers = {'content-type': 'application/json'}

payload = {'title': 'a randomized control trial', 'abstract': 'hello world'}

## rCNN.py
tokens_input = Input(name='input', shape=(None, max_doc_len, max_sent_len), dtype="int32")
x = Embedding(p.max_features, p.embedding_dims)(tokens_input)
model = Model(input=tokens_input, output=x)
model.summary()
'''
Layer (type)                       Output Shape        Param #     Connected to
====================================================================================================
input (InputLayer)                 (None, None, 500, 500
____________________________________________________________________________________________________
embedding_11 (Embedding)           (None, None, 200)   2000000     input[0][0]

## keras_intermediate_outputs.py
input = Input(shape=(2,))

probs = Dense(2, activation='softmax', name='probs')(input)
probs = Dropout(1e-100)(probs)

model = Model(input=input, output=probs)
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

from keras.utils.np_utils import to_categorical
	votes.years <- read.csv("regressme.txt", sep="\t")
	dummy <- as.numeric(votes.years$vote==2)
	m <- glm(dummy ~ regress.me$year, family=binomial("logit"))
	> summary(m)

	Call:
	glm(formula = dummy ~ regress.me$year, family = binomial("logit"))

	Deviance Residuals:
	Min 1Q Median 3Q Max
	{
	"metadata": {
	"name": "irony-context"
	},
	"nbformat": 2,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "markdown",
	We need to deal with the "TextNodes" returned by PDF.js, which looks like this:

	{
	"str": "procedure was performed at 10 mmHg pressure. At 6 and 24 h postoperatively, a short-form McGill Questionnaire (MPQ) was",
	"dir": "ltr",
	"width": 477.8928,
	"height": 9,
	"transform": [
	9,
	0,
	from cochranenlp.readers.biviewer import PDFBiViewer
	dat = PDFBiViewer()
	# this will take a relatively long time the first
	# time it is invoked (it caches the dict)
	study = dat.get_study_from_pmid("16467647")
	print study.cochrane['CHARACTERISTICS']

	''' (something like):
	{'CHAR_INTERVENTIONS': 'Tailored dietary intervention to encourage a decrease in sodium intake and an increase in vitamin C and carotene intake via increasing F&V consumption. Dietary goals were to decrease salt to less than 8 and 10g/day in women and men respectively and increase carotene intake to more than 5000 \xc2\xb5g/day and vitamin C intake to more than 200mg/day. The intervention consisted of 2 individualised dietary counselling sessions at baseline and 5 months (15 minutes each), a group lecture half-way through the intervention, and 2 newsletters. Control subjects recieved the intervention at 12 months (cross-over period). Follow-up data were presented at 12 months.',
	'CHAR_METHODS': 'RCT of cross-over design but data analysed and
	import cochranenlp
	cd cochranenlp # assuming you're in ipython-like environment...
	from sds import pico_SDS
	# this will take a long time, especially the first time
	# because it should generate pickles of y_dict and the
	# corresponding vectorizers. this will happen in
	# experiments/pico_DS. these should be saved to disk
	# in the sds subdirectory. therafter, you can elide the
	# y_dict_pickle argument and use the default
	pico_SDS.run_DS_PICO_experiments(y_dict_pickle=None)
	import requests
	import json

	# assumes RobotReviewer running locally on port 5000
	url = "http://127.0.0.1:5000/is_an_rct"
	# toy example
	citation_data = {'title': 'a randomized control trial', 'abstract': 'hello world'}
	headers = {'content-type': 'application/json'}

	payload = {'title': 'a randomized control trial', 'abstract': 'hello world'}
	tokens_input = Input(name='input', shape=(None, max_doc_len, max_sent_len), dtype="int32")
	x = Embedding(p.max_features, p.embedding_dims)(tokens_input)
	model = Model(input=tokens_input, output=x)
	model.summary()
	'''
	Layer (type) Output Shape Param # Connected to
	====================================================================================================
	input (InputLayer) (None, None, 500, 500
	____________________________________________________________________________________________________
	embedding_11 (Embedding) (None, None, 200) 2000000 input[0][0]
	input = Input(shape=(2,))

	probs = Dense(2, activation='softmax', name='probs')(input)
	probs = Dropout(1e-100)(probs)

	model = Model(input=input, output=probs)
	model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

	from keras.utils.np_utils import to_categorical