Skip to content

Instantly share code, notes, and snippets.

@rjpower
rjpower / pdfutil.py
Created March 18, 2012 17:26
Extracting a title from a pdf document
from BeautifulSoup import BeautifulStoneSoup
import subprocess
import sys
import tempfile
def extract_pdf_title(pdfdata):
src_file = tempfile.NamedTemporaryFile(delete=True)
src_file.write(pdfdata)
src_file.flush()
@rjpower
rjpower / ggplot-tcp-timeline.R
Created March 20, 2012 20:50
generate a timeline using ggplot and tshark
require('ggplot2')
require('stringr')
FIELDS = c('frame.time_relative', 'frame.len',
'ip.src', 'tcp.srcport', 'udp.srcport',
'ip.dst', 'tcp.dstport', 'udp.dstport')
PCAP = 'nytimes.pcap'
TSHARK = paste('tshark','-E header=y', '-T fields')
data = read.csv(header=T, sep="\t", pipe(
@rjpower
rjpower / date_extract.py
Created March 20, 2012 21:58
Extract email sent times from a mailbox
#!/usr/bin/env python
import collections, glob, os, re
from dateutil.parser import *
sentmail_mbox = glob.glob(os.path.expanduser('~/.thunderbird/*/ImapMail/*/*/Sent Mail'))
lines = []
for mbox in sentmail_mbox:
lines.extend(open(mbox).read().split('\n'))
@rjpower
rjpower / sleeping-habits.R
Created March 20, 2012 21:59
Box plot a distribution of sleeping habits
library("ggplot2")
library("reshape")
getData = function() {
SOURCE=Sys.glob('/home/power/.thunderbird/*/*/*/*/Sent Mail')
return(readLines(SOURCE))
}
getMatches = function(data) {
matched_lines = grep('^Date:.*, .*-0\\d+', data, value=T)
@rjpower
rjpower / ggplot2-census.R
Created March 20, 2012 22:02
ggplot2 using census data
require("maps")
require("ggplot2")
require("sp")
census_data = "ACS_10_1YR_S1901/ACS_10_1YR_S1901_with_ann.csv"
t = read.csv(census_data, skip=6)
sinfo = data.frame(region=t$Geography,
mean=t$Estimate.48,
median=t$Estimate.44,
@rjpower
rjpower / thread_prof.py
Last active December 17, 2015 10:19
Global thread profiling with Python.
from threading import Thread
import cProfile
import pstats
def enable_thread_profiling():
'''Monkey-patch Thread.run to enable global profiling.
Each thread creates a local profiler; statistics are pooled
to the global stats object on run completion.'''
@rjpower
rjpower / watchdog.py
Created June 15, 2013 17:45
File based watchdog timer.
class FileWatchdog(threading.Thread):
"""Watchdog for a file (typically `sys.stdin`).
When the file closes, terminate the process.
(This typically occurs when the parent process is lost.)
"""
def __init__(self, file_handle):
threading.Thread.__init__(self, name='WatchdogThread')
self.setDaemon(True)
self.file_handle = file_handle
#!/usr/bin/env python
"""
Compare speed of several methods of copying data between two GPU devices.
"""
import atexit, ctypes, re, time
import numpy as np
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
@rjpower
rjpower / array-viz.py
Created October 31, 2013 13:29
Quick matplotlib script to create pictures for array visualizations.
import pylab
import pandas as P
import numpy as np
def draw_array(a, target=None):
fig = pylab.gcf()
fig.frameon = False
ax = fig.gca()
#ax.set_axis_off()
@rjpower
rjpower / index.html
Created November 11, 2013 21:53
Transaction chain visualization/cycle finding.
<!DOCTYPE html>
<!-- Some path computation code borrowed form http://bl.ocks.org/rkirsling/5001347 -->
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Transaction Chain Viz</title>
<script type='text/javascript' src='http://d3js.org/d3.v3.min.js'></script>
<script type='text/javascript' src='http://cdnjs.cloudflare.com/ajax/libs/lodash.js/2.2.1/lodash.min.js'></script>
<script type='text/javascript' src='http://cpettitt.github.io/project/dagre-d3/latest/dagre-d3.min.js'></script>