Skip to content

Instantly share code, notes, and snippets.

@harlo
harlo / keybase.md
Last active June 4, 2018 20:34
current keybase verrification

Keybase proof

I hereby claim:

  • I am harlo on github.
  • I am harlo (https://keybase.io/harlo) on keybase.
  • I have a public key whose fingerprint is 4422 F773 B498 8C77 F99D 287E 655C 2E48 33B8 2A02

To claim this, I am signing this object:

@harlo
harlo / label_cleanser.py
Created January 16, 2014 22:12
This is the python implementation of our label cleanser (which we ultimately ported to ruby.) In python, we use Levenshtein, but in ruby, we just JaroWinkler. The results are slightly different, so the thresholds had to be adjusted accordingly.
from collections import namedtuple
from Levenshtein import ratio
from Levenshtein import distance
import re, csv, os
delimiter = ','
quotechar = '|'
quoting = csv.QUOTE_MINIMAL
BrandInfo = namedtuple('BrandInfo', 'model_text utqg_correlate')
@harlo
harlo / .gitignore
Last active August 29, 2015 14:19
conf tools i use a lot
*.pyc
@harlo
harlo / PixelKnotRandomPhraseGenerator.java
Last active August 29, 2015 14:17
Random Passphrase Generation in Android using the Spell Checker :)
package info.guardianproject.pixelknot.utils;
import java.util.ArrayList;
import java.util.Random;
import android.content.Context;
import android.view.textservice.SentenceSuggestionsInfo;
import android.view.textservice.SpellCheckerSession;
import android.view.textservice.SpellCheckerSession.SpellCheckerSessionListener;
import android.view.textservice.SuggestionsInfo;
@harlo
harlo / get_image_info.py
Last active August 29, 2015 14:12
threading together some screenshot pngs to video
def get_image_info(data):
"""
FROM http://markasread.net/post/17551554979/get-image-size-info-using-pure-python-code
Return (content_type, width, height) for a given img file content
no requirements
"""
import struct
data = str(data)
@harlo
harlo / Dockerfile
Created January 1, 2015 17:07
Globaleaks Dockerfile
FROM ubuntu:14.04
MAINTAINER harlo <harlo.holmes@gmail.com>
# UPDATE
RUN apt-get update
RUN apt-get -yq install openssl python-dev make curl openssh-server
RUN useradd -ms /bin/bash -p $(openssl passwd -1 'YOUR PASSWORD, DINGUS') globaleaks
RUN adduser globaleaks sudo
@harlo
harlo / page_map_to_csv.py
Created December 10, 2014 19:57
real quick word count json-to-csv for deeplab
from sys import argv, exit
def page_map_to_csv(file_in, file_out=None):
import os
if not os.path.exists(file_in):
print "NOPE: %s is not there" % file_in
return False
from json import loads
@harlo
harlo / createGensimObjects.py
Created December 4, 2014 18:34
createGensimObjects
def createGensimObjects(task):
task_tag = "GENSIM TOPIC EXTRACTION"
print "\n\n************** %s [START] ******************\n" % task_tag
print "USING TEXT DOCUMENT at %s" % task.doc_id
task.setStatus(302)
from lib.Worker.Models.uv_document import UnveillanceDocument
from conf import DEBUG
from vars import ASSET_TAGS
@harlo
harlo / generatePageMap.py
Created December 4, 2014 18:33
generatePageMap
def generatePageMap(uv_task):
task_tag = "PAGE MAPPER"
print "\n\n************** %s [START] ******************\n" % task_tag
print "MAPPING PAGES FROM TEXT DOCUMENT at %s" % uv_task.doc_id
uv_task.setStatus(302)
from lib.Worker.Models.uv_document import UnveillanceDocument
from conf import DEBUG
from vars import ASSET_TAGS
@harlo
harlo / extractNEREntities.py
Last active August 29, 2015 14:10
extractNEREntities
def extractNEREntities(task):
task_tag = "NER ENTITY EXTRACTION"
print "\n\n************** %s [START] ******************\n" % task_tag
print "TOKENIZING TEXT DOCUMENT at %s" % task.doc_id
task.setStatus(302)
from lib.Worker.Models.uv_document import UnveillanceDocument
from conf import DEBUG
from vars import ASSET_TAGS