Skip to content

Instantly share code, notes, and snippets.

View erickpeirson's full-sized avatar
🟢

Erick erickpeirson

🟢
View GitHub Profile
@erickpeirson
erickpeirson / process_workspace.py
Last active August 29, 2015 14:19
Extract h and theta from a Hydrus workspace
# Usage:
# $ python process_workspace [path to workspace] [path to output directory]
import sys
import os
import pandas as pd
import re
md = sys.argv[1] # Path to directory containing Hydrus output directories.
outpath = sys.argv[2] # Path to directory where processed data will be stored.
@erickpeirson
erickpeirson / change_nodes.py
Last active August 29, 2015 14:19
Change the number of nodes in Hydrus workspace
# Usage:
# python change_nodes.py [path to workspace]
import sys
import os
import subprocess
import re
hpath = os.path.join(sys.argv[1], 'PROFILE.DAT')
@erickpeirson
erickpeirson / process_tlevel.py
Created April 24, 2015 01:34
Extract parameters from Hydrus T_Level.out file
# Usage:
# $ python process_tlevel.py [path to data] [path to output]
#
# [path to data] should point to a directory containing Hydrus workspaces (subdirectories)
# each of which contains a file called "T_Level.out". If such a file is not encountered
# in a subdirectory, that subdirectory will be ignored quietly.
#
import sys
import os
@erickpeirson
erickpeirson / openie.py
Last active August 29, 2015 14:21
Extractor for OpenIE running as a web socket (using websocketd)
import websocket
import re
import socket
class OpenIEExtractor(object):
endtoken = ''
greeting = '* * * * * * * * * * * * *'
def __init__(self, hostname, port, verbose=False):
url = 'ws://{hostname}:{port}'.format(hostname=hostname, port=port)
import nltk
class AuthoritativeText(object):
original = ""
T = [] # Tokens.
P = [] # Paragraph start indices (in T).
S = [] # Sentence start indices (in T).
I = [] # Token start,end character position indices.
def __init__(self, text):
# I found this package called "reshape", and it looks useful:
# http://svitsrv25.epfl.ch/R-doc/library/reshape/html/00Index.html
library("reshape")
# Here are some jagged data with a shared column.
d1<-data.frame(primary=c(1, 2, 3, 4), foo='bar')
d2<-data.frame(primary=c(1,3,5), secret=42)
d3<-data.frame(primary=c(5, 3, 1, 20, 4), scout="scout")
d4<-data.frame(primary=c(6, 7, 20, 4), bat='baz')
d5<-data.frame(primary=c(1, 2, 3, 4, 5, 6, 7), pi=3.141592764)
@erickpeirson
erickpeirson / chains.png
Last active August 12, 2016 21:41
Change point model
chains.png
<OAI-PMH xmlns:bib="http://purl.org/net/biblio#" xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/" xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:prsmstndrd="http://prismstandard.org/namespaces/1.2/basic#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns"
xmlns:ztr="http://www.zotero.org/namespaces/export#" xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2016-09-09T19:41:07Z</responseDate>
<request verb="ListRecords" set="1" metadataPrefix="oai_dc">/oaipmh/</request>
<ListRecords>
<record>
<header>
from django.contrib.contenttypes.models import ContentType
from collections import Counter
import networkx as nx
def generate_collection_coauthor_graph(collection,
author_predicate_uri="http://purl.org/net/biblio#authors"):
"""
Create a graph describing co-occurrences of :class:`.ConceptEntity` instances
linked to individual :class:`.Resource` instances via an authorship
:class:`.Relation` instance.
@erickpeirson
erickpeirson / arxiv-reflink.conf
Last active July 14, 2017 17:22
mod_wsgi with Flask