Skip to content

Instantly share code, notes, and snippets.

View jbylund's full-sized avatar

Joseph Bylund jbylund

View GitHub Profile
#!/usr/bin/python
import urllib2
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)
class instance_size:
def __init__(self):
pass
a bunch of files named loglines_... contain pixel requests, these correspond to roughly logparse output or instance id logs in s3
joseph 11:46:33 @ hephaestus ~/loglines> /bin/grep -H -F "e=24&" loglines_* > error_pixels
To take a 60 second sample from all live nginx (requires moreutils, as well as probably discover in your path):
#!/bin/bash
parallel -j 150 -i bash -c "source ~/.bashrc; ssh {} \"timeout 60 tail -f /mnt/nginx_access.log\" > loglines_{}" -- $( discover.py nginx | cut -f 3 | sort )
Get a sorted list of top 10 i keys which threw errors:
@jbylund
jbylund / csv_multi.py
Last active May 21, 2023 05:41
Simple csv multiprocessor magic.
#!/usr/bin/python
import csv
import math
import multiprocessing
import os
import sys
import tempfile
READ_BUFFER = 2**13
function FindProxyForURL(url, host) {
if (
shExpMatch(host, "101com.com") ||
shExpMatch(host, "*.101com.com") ||
shExpMatch(host, "101order.com") ||
shExpMatch(host, "*.101order.com") ||
shExpMatch(host, "123found.com") ||
shExpMatch(host, "*.123found.com") ||
shExpMatch(host, "123pagerank.com") ||
shExpMatch(host, "*.123pagerank.com") ||
@jbylund
jbylund / pre-commit
Last active June 5, 2022 14:46
pre-commit
#!/usr/bin/python
import subprocess
import os
import json
import sys
def get_staged_files():
return subprocess.check_output("git diff --cached --name-only".split()).strip().split('\n')
class Check(object):
@jbylund
jbylund / date_range.py
Created December 8, 2015 14:19
Date range logic
class DateRange(object):
start_of_time = datetime.date.min
end_of_time = datetime.date.max
def __init__(self, start=start_of_time, end=end_of_time):
if type(start) != type(DateRange.start_of_time):
self.start = datetime.datetime.strptime(start, '%Y-%m-%d').date()
else:
self.start = start
if type(end) != type(DateRange.end_of_time):
self.end = datetime.datetime.strptime(end, '%Y-%m-%d').date()
@jbylund
jbylund / hash_partition.py
Created December 12, 2015 01:50
Hash Partition some number of files... to be patched up some.
#!/usr/bin/python
import argparse
import hashlib
import json
import multiprocessing
import os
import re
import resource
import subprocess
import urllib
export workdir=$(mktemp -d)
(
cd $workdir
wget http://ftpmirror.gnu.org/parallel/parallel-latest.tar.bz2
tar -xf parallel-latest.tar.bz2
cd $(find . -maxdepth 1 -mindepth 1 -type d)
./configure
make -j $(grep -c processor /proc/cpuinfo)
sudo --non-interactive make install
)
#!/usr/bin/python
"""
Something that's kind of sort of like a multiprocessing pool, but is designed for heterogenously sized tasks
"""
import math
import multiprocessing
import random
import sys
import time
import Queue
#!/usr/bin/python
"""Get the set of instance id's (exclude spot instances)!"""
import boto3
import datetime
import json
import redis
import sys
import base64
VERSION = 0.2