Skip to content

Instantly share code, notes, and snippets.

@tripleee
tripleee / postsxml2csv.py
Last active September 6, 2021 15:19
postsxml2csv
#!/usr/bin/env python3
# -*- python -*-
from xml.etree import cElementTree
import csv
# Copy/paste from the help section on SEDE
# http://data.stackexchange.com/stackoverflow/query/new
schema = [
@tripleee
tripleee / pharma-domains.txt
Created July 20, 2017 10:16
Domains from "Repeated URL at end of long post" hits
realsupplementadvisor.com
supplementssafe.com
supplementvalley.com
besthealthdiet.com
dietasrevisao.com
mynutritionsguide.com
toptryloburn.com
freesupplementrial.com
besthealthmarket.org
healthflyup.com
#!/usr/bin/env python3
import fileinput, time
for line in fileinput.input():
when, who, what = line.rstrip('\n').split('\t')
print("{0} {1} {2}".format(
time.strftime("%c", time.gmtime(int(when))), who, what))
@tripleee
tripleee / FIRE.gif
Last active April 13, 2017 06:58
FIRE.gif
FIRE.gif
{"dead": 1174, "life_avg": 156.9923339011925, "life_sum": 184309.0, "site": "apple.stackexchange.com", "alive": 123}
{"dead": 1016, "life_avg": 158.07972440944883, "life_sum": 160609.0, "site": "stackoverflow.com", "alive": 413}
{"dead": 765, "life_avg": 156.25359477124184, "life_sum": 119534.0, "site": "graphicdesign.stackexchange.com", "alive": 103}
{"dead": 825, "life_avg": 125.70060606060606, "life_sum": 103703.0, "site": "drupal.stackexchange.com", "alive": 70}
{"dead": 570, "life_avg": 152.22105263157894, "life_sum": 86766.0, "site": "superuser.com", "alive": 96}
{"dead": 741, "life_avg": 66.83265856950068, "life_sum": 49523.0, "site": "askubuntu.com", "alive": 43}
{"dead": 7, "life_avg": 6103.428571428572, "life_sum": 42724.0, "site": "writers.stackexchange.com", "alive": 6}
{"dead": 194, "life_avg": 203.58247422680412, "life_sum": 39495.0, "site": "security.stackexchange.com", "alive": 26}
{"dead": 213, "life_avg": 128.55868544600938, "life_sum": 27383.0, "site": "meta.stackexchange.com", "alive": 21}
@tripleee
tripleee / deleted.py
Created March 29, 2017 12:40
Average deletion time from Metasmoke JSON dump
#!/usr/bin/env python
import sys, json
from datetime import datetime, timedelta
with open(sys.argv[1], 'r') as input:
j = json.load(input)
deleted = list()
alive = list()
@tripleee
tripleee / git-up.bash
Last active January 17, 2017 09:26
Pull from upstream, push to clone
# Suitable for inclusion in your .bash_profile or similar
up () {
local targetbranch=${1-master}
local currentbranch=$(git rev-parse --abbrev-ref HEAD)
git checkout -q "$targetbranch"
git pull upstream "$targetbranch" && git push origin "$targetbranch"
git checkout -q "$currentbranch"
}
# Fetch latest 100 metasmoke entries
url -so metasmoke.json 'https://metasmoke.erwaysoftware.com/search.json'
./urls metasmoke.json |
# output is list of metasmoke id, post URL, contained URL
# exclude duplicates where one post URL contained the same contained URL multiple times
uniq |
# extract the contained URLs
cut -d, -f3- |
# extract the host part (crudely)
@tripleee
tripleee / metasmoke-img-md5.sh
Created November 2, 2016 04:59
MD5 checksums from Metasmoke image spam
# Metasmoke search "img src" in body, true positive feedback
curl -o /tmp/metasmoke-img 'https://metasmoke.erwaysoftware.com/search.json?body=img+src%3D&commit=Search&feedback=true+positive&reason=&site=&title=&user_rep_direction=%3E%3D&user_reputation=0&username=&utf8=%E2%9C%93&why='
# Extract the image URL, fetch it, and produce an MD5
grep -hEo '\\u003cimg src=\\"[^"\]*' /tmp/metasmoke-img |
cut -d '"' -f2 | sort -u |
xargs sh -c 'for u; do printf "%s\t%s\n" "$u" "$(curl -s "$u" | md5)"; done' _ |
sort -k2 >/tmp/metasmoke-img.md5
# How many?
@tripleee
tripleee / starttime.sh
Created August 17, 2016 06:23
starttime - print start time of a process on Linux
#!/bin/sh
# http://stackoverflow.com/a/38740102/874188
for p; do
printf "/proc/%i/stat\0" "$p"
done |
xargs -r0 awk -v ticks="$(getconf CLK_TCK)" 'NR==1 { now=$1; next }
{ print strftime("%c " $1 " " $2, systime() - (now-($22/ticks))) }
' /proc/uptime