Skip to content

Instantly share code, notes, and snippets.

# This ended up not working. Instead I crawled it sort of manually.
from bs4 import BeautifulSoup as Soup
import os
from datetime import datetime
from pod import ProgressTracker
import requests
import json
import sys
sys.path.append('..')
import json
from cluster_func import RC_PARAMS
import math
import sys
sys.path.append('..')
from LOCAL_SETTINGS import DATA_DIR, SITES
import corenlpy
import os
import re
import t4k
@enewe101
enewe101 / get_constituent_tokens.py
Created November 28, 2016 19:00
This should be in corenlp-xml-reader
def get_constituent_tokens(constituent, recursive=True):
tokens = []
for child in constituent['c_children']:
if isinstance(child, Token):
tokens.append(child)
elif recursive:
tokens.extend(get_constituent_tokens(child, recursive))
return tokens
@enewe101
enewe101 / gpu-helios.sh
Created October 26, 2016 16:40
Run Theano GPU code on Helios
#!/bin/bash
#PBS -l walltime=12:00:00
#PBS -l nodes=1:gpus=1
#PBS -A eeg-641-aa
#PBS -e /home/enewel3/jobs/relation2vec/proto.stderr
#PBS -o /home/enewel3/jobs/relation2vec/proto.stdout
#PBS -N r2v-proto
# Load modules, source virtualenv, go to source dir
module load apps/python/2.7.10 cuda/7.5.18 libs/cuDNN/4
@enewe101
enewe101 / gpu-guillimin.pbs
Last active October 26, 2016 16:39
Run theano GPU program on Guillimin
#!/bin/bash
#PBS -l nodes=1:ppn=12:gpus=1
#PBS -l walltime=12:00:00
#PBS -o /gs/project/eeg-641-aa/enewel3/entity-embeddings/src/embedder/pbs/train-basic.stdout
#PBS -e /gs/project/eeg-641-aa/enewel3/entity-embeddings/src/embedder/pbs/train-basic.stderr
#PBS -N train-basic
# Load modules, go to source dir, activate virtualenv
module load Python/2.7.10 CUDA/7.5.18 cuDNN/5.0-ga
cd /gs/project/eeg-641-aa/enewel3/entity-embeddings/src/embedder
#!/bin/bash
#PBS -l nodes=1:ppn=12
#PBS -l walltime=8:00:00
#PBS -l pmem=4999m
#PBS -o /gs/project/eeg-641-aa/enewel3/jobs/worldviews-demographics/64-ABA.output
#PBS -e /gs/project/eeg-641-aa/enewel3/jobs/worldviews-demographics/64-ABA.error
#PBS -N 64-ABA
module load Python/2.7.10
module load Java/1.8.0_45
import random
import shutil
from t4k import (
PersistentOrderedDict as POD, ProgressTracker as PT,
DuplicateKeyException
)
# The POD is a general-purpose data store that syncs to disk
# The PT is a subclass that's a bit more convenient for tracking
@enewe101
enewe101 / parc_reader.py
Created July 22, 2016 21:18
Basic reader of parc xml files
from bs4 import BeautifulSoup as Soup
class AnnotatedText(object):
def __init__(self, parc_xml):
self.soup = Soup(parc_xml, 'html.parser')
self.words = []
@enewe101
enewe101 / setup-theano-lasagne.sh
Last active January 24, 2018 03:06
Script to setup a virtualenv, ready for GPU programming using theano and lasagne, on the guillimin *or* helios clusters
#
# usage: $ bash setup-theano-lasagne.sh my-env-dir
#
# For use on helios.computecanada.ca OR guillimin.hpc.mcgill.ca.
# Creates a python virtual env at the directory specified by the first
# command line argument, then installs numpy, scipy, theano, and lasagne.
# Also prints handy instructions for using the virtualenv to submit
# jobs using the queing system, or get an interactive session.
#
#
# usage: $ ./setup-helios-theano-lasagne my-env-dir
#
# For use on helios.computecanada.ca. Creates a python virtual env at the
# directory specified by the first command line argument, then installs numpy,
# scipy, theano, and lasagne.
#
# Collect install-path variables
install_dir=$(readlink -f $1)