Andreas van Cranenburgh andreasvc

## 1027.txt.mrg.gz

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              2 stars
            
          
                andreasvc
                / 1027.txt.mrg.gz
            
            
              Last active
              December 30, 2022 00:35
            
              
                A tutorial on using tree fragments for text classification. http://nbviewer.ipython.org/gist/andreasvc/9467e27680d8950045b2
              
          
            View raw
        
    
## DH-crash-course-riddle.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                andreasvc
                / DH-crash-course-riddle.ipynb
            
            
              Last active
              August 29, 2015 14:08
            
              
                Genre Classification with a Bag-of-Words model. See http://nbviewer.ipython.org/gist/andreasvc/5d9b17fb981ee2a8b728
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## TopicModeling.ipynb

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              1 star
            
          
                andreasvc
                / TopicModeling.ipynb
            
            
              Created
              October 23, 2014 20:51
            
              
                Topic Modeling with gensim. Load in ipython notebook or view online: http://nbviewer.ipython.org/gist/andreasvc/66fe7547b05569c9a273
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gmanevert.user.js
// ==UserScript==
// @name        Gmane vertical frames
// @namespace   andreas@unstable.nl
// @include     http://news.gmane.org/*
// @include     http://thread.gmane.org/*
// @version     1
// @grant       none
// ==/UserScript==

// The default GMane 'news' view has horizontal panes which wastes lots of screen space;

## metainfo.py
"""Extract metadata from Project Gutenberg RDF catalog into a Python dict.

Based on https://bitbucket.org/c-w/gutenberg/

>>> md = readmetadata()
>>> md[123]
{'LCC': {'PS'},
 'author': u'Burroughs, Edgar Rice',
 'authoryearofbirth': 1875,
 'authoryearofdeath': 1950,

## cythonbookreview.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                andreasvc
                / cythonbookreview.md
            
            
              Last active
              February 11, 2018 06:02
            
              
                Review of Learning Cython Programming, by Philip Herron
              
          
    Learning Cython Programming

by Philip Herron
Birmingham: Packt Publishing, 2013, available in print and as ebook; this review is based on the PDF, 110 pp.
Reviewed by

Andreas van Cranenburgh

University of Amsterdam

  
## pca.py
"""Apply PCA to a CSV file and plot its datapoints (one per line).

The first column should be a category (determines the color of each datapoint),
the second a label (shown alongside each datapoint)."""
import sys
import pandas
import pylab as pl
from sklearn import preprocessing
from sklearn.decomposition import PCA

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                andreasvc
                / README.md
            
            
              Last active
              May 28, 2019 07:29
            
              
                Tiger & Lassy train-dev-test splits
              
          
    Tiger & Lassy train-dev-test splits

These scripts produce the train-dev-test splits for the Tiger & Lassy treebanks
used in my 2013 IWPT paper. The Tiger treebank version 2.1 was used, namely
tiger_release_aug07.export. The Lassy treebank was version 1.1, or
lassy-r19749. The reason for not just taking the last 20% for the
development & test set is to ensure a balanced distribution of sentences, which
otherwise would have an uneven distribution of length & topics.

  
## multiprocexample.py
""" A simple multiprocessing example with process pools, shared data and
per-process initialization. """
import multiprocessing

# global read-only data can be shared by each process
DATA = 11

def initworker(a):
	""" Initialize data specific to each process. """
	global MOREDATA

## classify.py
""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
labels taken from first column, of the form 'label_description'. """
import sys
import pandas
from sklearn import svm, cross_validation, preprocessing
data = pandas.read_csv(sys.argv[1])
xdata = data.as_matrix(data.columns[1:])
#xdata = preprocessing.scale(xdata)  # normalize data => mean of 0, stddev of 1
ylabels = [a.split('_')[0] for a in data.icol(0)]
ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)
	// ==UserScript==
	// @name Gmane vertical frames
	// @namespace andreas@unstable.nl
	// @include http://news.gmane.org/*
	// @include http://thread.gmane.org/*
	// @version 1
	// @grant none
	// ==/UserScript==

	// The default GMane 'news' view has horizontal panes which wastes lots of screen space;
	"""Extract metadata from Project Gutenberg RDF catalog into a Python dict.

	Based on https://bitbucket.org/c-w/gutenberg/

	>>> md = readmetadata()
	>>> md[123]
	{'LCC': {'PS'},
	'author': u'Burroughs, Edgar Rice',
	'authoryearofbirth': 1875,
	'authoryearofdeath': 1950,
	"""Apply PCA to a CSV file and plot its datapoints (one per line).

	The first column should be a category (determines the color of each datapoint),
	the second a label (shown alongside each datapoint)."""
	import sys
	import pandas
	import pylab as pl
	from sklearn import preprocessing
	from sklearn.decomposition import PCA
	""" A simple multiprocessing example with process pools, shared data and
	per-process initialization. """
	import multiprocessing

	# global read-only data can be shared by each process
	DATA = 11

	def initworker(a):
	""" Initialize data specific to each process. """
	global MOREDATA
	""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
	labels taken from first column, of the form 'label_description'. """
	import sys
	import pandas
	from sklearn import svm, cross_validation, preprocessing
	data = pandas.read_csv(sys.argv[1])
	xdata = data.as_matrix(data.columns[1:])
	#xdata = preprocessing.scale(xdata) # normalize data => mean of 0, stddev of 1
	ylabels = [a.split('_')[0] for a in data.icol(0)]
	ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)