Skip to content

Instantly share code, notes, and snippets.

View Calculate Weekly Percentiles Per Country
# calculates per country weekly percentiles
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.dates as md
import csv
import datetime
from scipy import stats
View gist:9af8c6edbf44becbc6ac
# Reads a file like <count>, raw browser string
# and processses it to output:
# percentage, normalized browser string
import sys
import md5
from ua_parser import user_agent_parser
# beautify ua
nuria / UaCalculator
Last active August 29, 2015 14:14
Calculates user agent percentages
View UaCalculator
# read file
# File format is:
# {"browser_major":"1","os_family":"Android","os_major":"1","device_family":"Opus One","browser_family":"Android","os_minor":"5"} 5
# {"browser_major":"1","os_family":"Android","os_major":"4","device_family":"icube 900","browser_family":"Baidu Explorer","os_minor":"2"} 1
# hash and store values
# loop over values and
nuria / cat_file_to_tcp_endpoint
Last active August 29, 2015 14:15
Cats a file to a tcp endpoint line by line using streams and zeromq
View cat_file_to_tcp_endpoint
import zmq
import io
import time
import sys
import re
# reads line by line a file and sends it
# to a tcp endpoint using zeromq
# handy to cat big files to a listener
nuria / gist:2f5e33902122870e44e0
Created March 25, 2015 17:36
View gist:2f5e33902122870e44e0
node 'limn1.eqiad.wmflabs' {
include webserver::apache
# make sure /var/log/apache2 is readable by wikidevs for debugging.
# This won't make the actual log files readable, only the directory.
# Individual log files can be created and made readable by
# classes that manage individual sites.
file { '/var/log/apache2':
ensure => 'directory',
owner => 'root',
nuria / gist:e0b0d4a702cdd45f6d37
Last active August 29, 2015 14:17
VCL cookie setting, time maniulation
View gist:e0b0d4a702cdd45f6d37
#include <time.h>
#include <string.h>
#define vcl_string char
char* get_expiration() {
struct tm str_time;
time_t time_of_day;
char expiration[100];
nuria / testing_udf.hql
Last active September 16, 2015 18:29
Testing a udf
View testing_udf.hql
add jar /home/nuria/workplace/refinery/source/refinery-core/target/refinery-core-0.0.19-SNAPSHOT.jar;
add jar /home/nuria/workplace/refinery/source/refinery-hive/target/refinery-hive-0.0.19-SNAPSHOT.jar;
use wmf;
select isPageview(uri_host, uri_path, uri_query, http_status, content_type, user_agent) from webrequest where year=2015 and month=09 and day=04 and hour=01;
--call hive like this leaving the hive.aux.jars.path
>hive --hiveconf hive.aux.jars.path= -f test-udf.hql
View find_ips_with_less_than_10_ocurrences
-- geocoded data on webrequest is like:
-- {"city":"Unknown","country_code":"--","longitude":"-1","postal_code":"Unknown","timezone":"Unknown","subdivision":"Unknown","continent":"Unknown","latitude":"-1","country":"Unknown"}
-- find records where by city we have less than 10 unique IPs
use wmf;
select wr1.client_ip, geocoded_data["city"] from webrequest as wr1 where year=2015 and month=09 and hour=01
and wr1.client_ip in (select wr2.client_ip from webrequest wr2 where year=2015 and month=09 and hour=01 group by wr2.client_ip having count(*) <10);
nuria / gist:833fef6a74574125a3fc
Last active September 30, 2015 20:59
Add a third party lib to map reduce job
View gist:833fef6a74574125a3fc
export LIBJARS=/home/nuria/avro-kafka/camus-example-0.1.0-wmf6.jar,/home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar
export HADOOP_CLASSPATH=/home/nuria/avro-kafka/camus-example-0.1.0-wmf6.jar:/home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar
/usr/bin/hadoop jar /home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar com.linkedin.camus.etl.kafka.CamusJob -libjars ${LIBJARS}"nuria_testing_avr
o" -P /home/nuria/avro-kafka/ >> ./log_camus_avro_test.txt 2>&1
nuria / gist:01fef56a8a69528fee93
Created October 26, 2015 22:47
consume from kafka
View gist:01fef56a8a69528fee93
from kafka import KafkaConsumer
import avro.schema
import io
# To consume messages
consumer = KafkaConsumer('mediawiki_CirrusSearchRequestSet',