Jon Gauthier hans

## viz_example.py
def viz_example(ex):
    toks = [v[idx] for idx in ex.features.feature["input_ids"].int64_list.value if idx > 0]
    masked_words = [v[idx] for idx in ex.features.feature["masked_lm_ids"].int64_list.value if idx > 0]

    masked_dict = dict(zip(ex.features.feature["masked_lm_positions"].int64_list.value, masked_words))
    toks_out = ["%10s" % tok for tok in toks]
    mask_out = ["%10s" % masked_dict[i] if i in masked_dict else " " * 10 for i in range(len(toks))]

    for block_start in range(0, len(toks), 10):
        print(" ".join(toks_out[block_start:block_start+10]))

## analysis.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hans
                / analysis.ipynb
            
            
              Created
              May 13, 2019 14:21
            
              
                MEG preliminary analysis
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## childes_questions.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hans
                / childes_questions.ipynb
            
            
              Created
              April 4, 2019 18:19
            
              
                CHILDES analysis: extract mother–child question–answer pairs from Brown corpus
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:825da8929a504a493d932479c78c5153
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",

## question_guided_visual_search.js
// WebPPL code for a proof-of-concept model of
// question-guided visual search.

var images = [0, 1]
// deterministic p(description | image)
var descriptions = [
  {blocked_intent: true, missing_object: false},
  {blocked_intent: false, missing_object: true}
]

## fetch_utterances.py
from argparse import ArgumentParser
from collections import namedtuple
from pathlib import Path
import re

import pandas as pd


UTT_RE = re.compile(r"\*([A-Z]+):\s*(.+)\s*\x15(\d+)_(\d+)\x15$")
TAG_RE = re.compile(r"([a-z:]+)\|(\w+)")

## build_verb_argument_vocab.py
"""
Calculate statistics on verb-argument pairings given a parsed corpus
of CoNLL-U-formatted files. Part-of-speech tags and dependency heads+labels
are required.
"""

from collections import Counter
from pathlib import Path
import re
import sys

## fig.tex
\begin{tikzpicture}
\node (example) [draw,rectangle,align=center] at (0, 0) {Learning instance\\$w_1, \dots, w_N \to A$};
\node (dec-learn) [draw,rectangle,align=center] at (8, -3) {\textbf{Learn:}\\Infer meanings for $w_1, \dots, w_N$};
\node (lexicon) [draw,rectangle,align=center] at (0, -3) {Lexicon $\Lambda$\\$\text{raise} \to \texttt{S/N/PP} \lambda c\,x.\, c \land dir(e) = up \land move(e,x)$\\$\text{drop} \to \texttt{S/N/PP} \lambda c\,x.\, c \land dir(e) = down \land move(e,x)$\\$\text{onto} \to \texttt{PP} \lambda x.contact(x,e.patient)$\\$\ldots$};
\node (dec-compress) [draw,rectangle,align=center] at (0, -6) {\textbf{Compress:}\\Abstract away regularities in the lexicon};
\node (dec-bridge) [draw,rectangle,align=center] at (-8, -3) {\textbf{Bridge:}\\Derive new syntactic sub-categories};

\begin{scope}[shift={(-6,-7.2)}]
\Tree [.$\lambda$ [.$c$ ] [.$x$ ] [.$\land$ [.$c$ ] [.$=$ [.$dir(e)$ ] [.$up$ ] ] [.$move$ [.$e$ ] [.$x$ ] ] ] ]
\begin{scope}[xshift=130pt]

## gambling_eig.js
var ps = Categorical({vs: [0.2, 0.4, 0.6, 0.8]})
var ps_skew_left = Categorical({vs: [0.1, 0.2, 0.3, 0.8]})

var vs = Categorical({vs: [2, 4, 6, 8]})
var vs_skew_left = Categorical({vs: [1, 2, 3, 10]})

var Model = function(name, f) {
    Object.defineProperty(f, 'name', {value: name})
    return f;
};

## brain decoding results
First stab at decoding P01 data onto InferSent embeddings (dim 4096).
This is promising performance, since I haven't touched either the imaging data or the encodings since they were produced. (The regression is ~20,000 dim -> 4096 dim – definitely can improve with some preprocessing.)
Compare with original results from the problem set later on in this gist.

INFO:__main__:Loaded encodings of size (384, 4096).
INFO:__main__:Loaded subject data/P01 data.
INFO:__main__:Trained classifier for subject data/P01.
Fold  0: min  4.0       mean  96.5      med  41.0       max  350.0
Fold  1: min  0.0       mean  131.9     med  68.0       max  323.0
Fold  2: min  2.0       mean  144.3     med  129.0      max  366.0
	def viz_example(ex):
	toks = [v[idx] for idx in ex.features.feature["input_ids"].int64_list.value if idx > 0]
	masked_words = [v[idx] for idx in ex.features.feature["masked_lm_ids"].int64_list.value if idx > 0]

	masked_dict = dict(zip(ex.features.feature["masked_lm_positions"].int64_list.value, masked_words))
	toks_out = ["%10s" % tok for tok in toks]
	mask_out = ["%10s" % masked_dict[i] if i in masked_dict else " " * 10 for i in range(len(toks))]

	for block_start in range(0, len(toks), 10):
	print(" ".join(toks_out[block_start:block_start+10]))
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"import matplotlib.pyplot as plt\n",
	// WebPPL code for a proof-of-concept model of
	// question-guided visual search.

	var images = [0, 1]
	// deterministic p(description \| image)
	var descriptions = [
	{blocked_intent: true, missing_object: false},
	{blocked_intent: false, missing_object: true}
	]
	from argparse import ArgumentParser
	from collections import namedtuple
	from pathlib import Path
	import re

	import pandas as pd


	UTT_RE = re.compile(r"\([A-Z]+):\s(.+)\s*\x15(\d+)_(\d+)\x15$")
	TAG_RE = re.compile(r"([a-z:]+)\\|(\w+)")
	"""
	Calculate statistics on verb-argument pairings given a parsed corpus
	of CoNLL-U-formatted files. Part-of-speech tags and dependency heads+labels
	are required.
	"""

	from collections import Counter
	from pathlib import Path
	import re
	import sys
	\begin{tikzpicture}
	\node (example) [draw,rectangle,align=center] at (0, 0) {Learning instance\\$w_1, \dots, w_N \to A$};
	\node (dec-learn) [draw,rectangle,align=center] at (8, -3) {\textbf{Learn:}\\Infer meanings for $w_1, \dots, w_N$};
	\node (lexicon) [draw,rectangle,align=center] at (0, -3) {Lexicon $\Lambda$\\$\text{raise} \to \texttt{S/N/PP} \lambda c\,x.\, c \land dir(e) = up \land move(e,x)$\\$\text{drop} \to \texttt{S/N/PP} \lambda c\,x.\, c \land dir(e) = down \land move(e,x)$\\$\text{onto} \to \texttt{PP} \lambda x.contact(x,e.patient)$\\$\ldots$};
	\node (dec-compress) [draw,rectangle,align=center] at (0, -6) {\textbf{Compress:}\\Abstract away regularities in the lexicon};
	\node (dec-bridge) [draw,rectangle,align=center] at (-8, -3) {\textbf{Bridge:}\\Derive new syntactic sub-categories};

	\begin{scope}[shift={(-6,-7.2)}]
	\Tree [.$\lambda$ [.$c$ ] [.$x$ ] [.$\land$ [.$c$ ] [.$=$ [.$dir(e)$ ] [.$up$ ] ] [.$move$ [.$e$ ] [.$x$ ] ] ] ]
	\begin{scope}[xshift=130pt]
	var ps = Categorical({vs: [0.2, 0.4, 0.6, 0.8]})
	var ps_skew_left = Categorical({vs: [0.1, 0.2, 0.3, 0.8]})

	var vs = Categorical({vs: [2, 4, 6, 8]})
	var vs_skew_left = Categorical({vs: [1, 2, 3, 10]})

	var Model = function(name, f) {
	Object.defineProperty(f, 'name', {value: name})
	return f;
	};
	First stab at decoding P01 data onto InferSent embeddings (dim 4096).
	This is promising performance, since I haven't touched either the imaging data or the encodings since they were produced. (The regression is ~20,000 dim -> 4096 dim – definitely can improve with some preprocessing.)
	Compare with original results from the problem set later on in this gist.

	INFO:__main__:Loaded encodings of size (384, 4096).
	INFO:__main__:Loaded subject data/P01 data.
	INFO:__main__:Trained classifier for subject data/P01.
	Fold 0: min 4.0 mean 96.5 med 41.0 max 350.0
	Fold 1: min 0.0 mean 131.9 med 68.0 max 323.0
	Fold 2: min 2.0 mean 144.3 med 129.0 max 366.0