Andreas van Cranenburgh andreasvc

## imdbimages.py
""" Generate image with plot & rating for each movie in a directory. """
from __future__ import print_function
import os
import re
import sys
import glob
import json
import time
import urllib
import textwrap

## evalpatterns.py
""" Run a set of XPath queries on a corpus of parse trees and compute precision
and recall with respect to a set of hand-picked sentences. """

from __future__ import print_function
import io
import os
import glob
import nltk
import alpinocorpus

## tiger2.2singleparent.diff
--- tiger_release_aug07.corrected.16012013.xml  2013-01-16 16:35:23.000000000 +0100
+++ tiger_2.2a.xml      2013-11-03 00:02:12.890306125 +0100
@@ -3097934,7 +3097934,6 @@
       <nt id="s46234_505" cat="PP">
         <edge label="AC" idref="s46234_24" />
         <edge label="NK" idref="s46234_25" />
-        <edge label="CJ" idref="s46234_135" />
       </nt>
       <nt id="s46234_506" cat="PP">
         <edge label="AC" idref="s46234_30" />

## classify.py
""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
labels taken from first column, of the form 'label_description'. """
import sys
import pandas
from sklearn import svm, cross_validation, preprocessing
data = pandas.read_csv(sys.argv[1])
xdata = data.as_matrix(data.columns[1:])
#xdata = preprocessing.scale(xdata)  # normalize data => mean of 0, stddev of 1
ylabels = [a.split('_')[0] for a in data.icol(0)]
ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)

## multiprocexample.py
""" A simple multiprocessing example with process pools, shared data and
per-process initialization. """
import multiprocessing

# global read-only data can be shared by each process
DATA = 11

def initworker(a):
	""" Initialize data specific to each process. """
	global MOREDATA

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                andreasvc
                / README.md
            
            
              Last active
              May 28, 2019 07:29
            
              
                Tiger & Lassy train-dev-test splits
              
          
    Tiger & Lassy train-dev-test splits

These scripts produce the train-dev-test splits for the Tiger & Lassy treebanks
used in my 2013 IWPT paper. The Tiger treebank version 2.1 was used, namely
tiger_release_aug07.export. The Lassy treebank was version 1.1, or
lassy-r19749. The reason for not just taking the last 20% for the
development & test set is to ensure a balanced distribution of sentences, which
otherwise would have an uneven distribution of length & topics.

  
## pca.py
"""Apply PCA to a CSV file and plot its datapoints (one per line).

The first column should be a category (determines the color of each datapoint),
the second a label (shown alongside each datapoint)."""
import sys
import pandas
import pylab as pl
from sklearn import preprocessing
from sklearn.decomposition import PCA

## cythonbookreview.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                andreasvc
                / cythonbookreview.md
            
            
              Last active
              February 11, 2018 06:02
            
              
                Review of Learning Cython Programming, by Philip Herron
              
          
    Learning Cython Programming

by Philip Herron
Birmingham: Packt Publishing, 2013, available in print and as ebook; this review is based on the PDF, 110 pp.
Reviewed by

Andreas van Cranenburgh

University of Amsterdam

  
## metainfo.py
"""Extract metadata from Project Gutenberg RDF catalog into a Python dict.

Based on https://bitbucket.org/c-w/gutenberg/

>>> md = readmetadata()
>>> md[123]
{'LCC': {'PS'},
 'author': u'Burroughs, Edgar Rice',
 'authoryearofbirth': 1875,
 'authoryearofdeath': 1950,

## gmanevert.user.js
// ==UserScript==
// @name        Gmane vertical frames
// @namespace   andreas@unstable.nl
// @include     http://news.gmane.org/*
// @include     http://thread.gmane.org/*
// @version     1
// @grant       none
// ==/UserScript==

// The default GMane 'news' view has horizontal panes which wastes lots of screen space;
	""" Generate image with plot & rating for each movie in a directory. """
	from __future__ import print_function
	import os
	import re
	import sys
	import glob
	import json
	import time
	import urllib
	import textwrap
	""" Run a set of XPath queries on a corpus of parse trees and compute precision
	and recall with respect to a set of hand-picked sentences. """

	from __future__ import print_function
	import io
	import os
	import glob
	import nltk
	import alpinocorpus
	--- tiger_release_aug07.corrected.16012013.xml 2013-01-16 16:35:23.000000000 +0100
	+++ tiger_2.2a.xml 2013-11-03 00:02:12.890306125 +0100
	@@ -3097934,7 +3097934,6 @@
	<nt id="s46234_505" cat="PP">
	<edge label="AC" idref="s46234_24" />
	<edge label="NK" idref="s46234_25" />
	- <edge label="CJ" idref="s46234_135" />
	</nt>
	<nt id="s46234_506" cat="PP">
	<edge label="AC" idref="s46234_30" />
	""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
	labels taken from first column, of the form 'label_description'. """
	import sys
	import pandas
	from sklearn import svm, cross_validation, preprocessing
	data = pandas.read_csv(sys.argv[1])
	xdata = data.as_matrix(data.columns[1:])
	#xdata = preprocessing.scale(xdata) # normalize data => mean of 0, stddev of 1
	ylabels = [a.split('_')[0] for a in data.icol(0)]
	ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)
	""" A simple multiprocessing example with process pools, shared data and
	per-process initialization. """
	import multiprocessing

	# global read-only data can be shared by each process
	DATA = 11

	def initworker(a):
	""" Initialize data specific to each process. """
	global MOREDATA
	"""Apply PCA to a CSV file and plot its datapoints (one per line).

	The first column should be a category (determines the color of each datapoint),
	the second a label (shown alongside each datapoint)."""
	import sys
	import pandas
	import pylab as pl
	from sklearn import preprocessing
	from sklearn.decomposition import PCA
	"""Extract metadata from Project Gutenberg RDF catalog into a Python dict.

	Based on https://bitbucket.org/c-w/gutenberg/

	>>> md = readmetadata()
	>>> md[123]
	{'LCC': {'PS'},
	'author': u'Burroughs, Edgar Rice',
	'authoryearofbirth': 1875,
	'authoryearofdeath': 1950,
	// ==UserScript==
	// @name Gmane vertical frames
	// @namespace andreas@unstable.nl
	// @include http://news.gmane.org/*
	// @include http://thread.gmane.org/*
	// @version 1
	// @grant none
	// ==/UserScript==

	// The default GMane 'news' view has horizontal panes which wastes lots of screen space;