Skip to content

Instantly share code, notes, and snippets.

View lukehollis's full-sized avatar

Luke Hollis lukehollis

View GitHub Profile
@lukehollis
lukehollis / import_geojson.py
Created February 3, 2015 04:15
GeoJSON Import
import pymongo
import json
import pdb
def mongo(db):
host = "localhost"
port = 27017
client = pymongo.MongoClient(host, port, max_pool_size=None)
return client[db]
@lukehollis
lukehollis / improve_schinke_stemming_resources.py
Created May 4, 2015 23:43
Improve the Schinke Stemming Algorithm Resources
conj_list = ['ac', 'at', 'atque', 'aut', 'et', 'ne', 'nec', 'non', 'sed', 'si', 'uel',
'cum', 'quum', 'donec', 'dum', 'enim', 'enimuero', 'etiam', 'etsi', 'igitur',
'itaque', 'nam', 'necnon', 'neque', 'nisi', 'postquam', 'quamquam', 'quamuis',
'quando', 'que', 'quia', 'quin', 'quippe', 'quinetiam', 'quod', 'quodque',
'siue', 'ut', 'tam', 'necdum']
prep_list = ['ante', 'ad', 'circum', 'contra', 'inter', 'intra', 'post', 'in', 'en', 'praeter',
'per', 'propter', 'super', 'uersus', 'extra', 'trans', 'sub', 'ob', 'a', 'ab',
'de', 'cum', 'e', 'ex', 'sine', 'pro', 'prae', 'sub', 'super']
@lukehollis
lukehollis / .gitignore
Created May 5, 2015 18:23
.gitingore for wamu.org repo
# Ignore configuration files that may contain sensitive information.
sites/*/settings*.php
# Ignore paths that contain user-generated content.
sites/*/files
sites/*/private
sites/*/~private
.svn
*.svn/
@lukehollis
lukehollis / scansion_to_html.py
Created December 5, 2015 06:31
Get scansion info and turn it into html
import pdb
import re
import string
import sys
class ScansionToHTML:
def __init__(self, line, scansion):
self.scansion = scansion
@lukehollis
lukehollis / perseus_to_mongo.py
Created January 28, 2016 15:32
Really simple CLTK data to Mongo for Perseus XML
import pdb
import os, json, re
from bs4 import BeautifulSoup
import html.parser
import pymongo
from db import mongo
class PerseusToMongo:
# a class to migrate Perseus XML file data to mongo db
@lukehollis
lukehollis / find_sentence_lengths.py
Created January 28, 2016 15:48
Find sentence lengths of texts in the CLTK corpora for Perseus Greek and Latin XML
"""
Inspired by "Quantifying origin and character of long-range correlations in narrative texts"
by Stanisław Drożdż, Paweł Oświȩcimkaa, Andrzej Kuliga, Jarosław Kwapieńa, Katarzyna Bazarnikb,
Iwona Grabska-Gradzińskac, Jan Rybickib, and Marek Stanuszekd, this is an attempt to implement
the CLTK tokenizers to sentence lengths of works from the Greek and Latin corpora from the
Perseus Digital Archive for analysis via the methods used by the above researchers.
"""
This file has been truncated, but you can view the full file.
[
{
"author": "Q. Horatius Flaccus (Horace)",
"language": "latin",
"sentence_lengths": [
37,
41,
36,
19,
18,
{"sentence_lengths": [52, 22, 5, 49, 23, 3, 24, 17, 25, 31, 11, 3, 17, 25, 30, 16, 17, 21, 17, 29, 9, 28, 15, 33, 3, 12, 21, 10, 13, 23, 7, 14, 13, 5, 10, 57, 14, 14, 15, 37, 17, 20, 15, 27, 25, 19, 9, 7, 14, 6, 7, 7, 11, 18, 15, 25, 66, 13, 5, 17, 29, 15, 12, 25, 20, 20, 27, 21, 50, 8, 19, 16, 14, 6, 6, 20, 6, 14, 28, 14, 28, 23, 28, 15, 33, 26, 20, 6, 33, 22, 19, 3, 5, 18, 3, 23, 41, 33, 20, 19, 12, 25, 2, 17, 17, 13, 42, 24, 12, 22, 61, 27, 21, 26, 8, 12, 10, 6, 22, 12, 8, 2, 18, 13, 8, 10, 21, 11, 5, 29, 26, 19, 7, 18, 8, 24, 13, 10, 20, 21, 16, 9, 20, 10, 10, 13, 30, 26, 29, 9, 23, 9, 9, 12, 7, 28, 22, 6, 13, 12, 20, 26, 39, 7, 12, 14, 35, 25, 20, 44, 16, 15, 9, 14, 19, 32, 35, 25, 12, 23, 13, 23, 25, 28, 5, 14, 13, 14, 26, 19, 26, 13, 34, 22, 25, 3, 29, 4, 8, 10, 14, 15, 22, 13, 12, 44, 7, 6, 6, 14, 15, 15, 20, 22, 27, 14, 4, 7, 7, 13, 15, 27, 14, 9, 10, 42, 20, 6, 5, 27, 17, 14, 9, 5, 14, 25, 15, 14, 7, 13, 6, 13, 23, 12, 27, 28, 34, 20, 7, 42, 6, 22, 19, 20, 21, 8, 20, 21, 42, 13, 27, 14, 13, 13, 13,
@lukehollis
lukehollis / functions.php
Created March 8, 2017 21:18
ACF Sponsorship Ad Spots
if(function_exists("register_field_group"))
{
register_field_group(array (
'id' => 'acf_sponsor-ads',
'title' => 'Sponsor Ads',
'fields' => array (
array (
'key' => 'field_58c0739e3a351',
'label' => 'sponsorship_ad_image',
'name' => 'sponsorship_ad_image',
@lukehollis
lukehollis / gist:6e49f833416706f24b8387c6b74ddd9c
Created March 29, 2017 22:51 — forked from adammeghji/gist:5637522
Convert a PostgreSQL database from SQL_ASCII to UTF8 encoding
# convert createdb's template to UTF8
echo "UPDATE pg_database SET datistemplate = FALSE WHERE datname = 'template1';" | psql -U postgres
echo "drop database template1;" | psql -U postgres
echo "create database template1 with template = template0 encoding = 'UTF8';" | psql -U postgres
echo "update pg_database set datacl='{=c/postgres,postgres=CTc/postgres}' where datname='template1';" | psql -U postgres
echo "UPDATE pg_database SET datistemplate = TRUE WHERE datname = 'template1';" | psql -U postgres
# export and reimport as UTF8
pg_dump -U uniiverse --encoding utf8 mydatabase -f mydatabase_utf8.sql
createdb -U postgres -E utf8 mydatabase_utf8