View Calculate Weekly Percentiles Per Country
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# calculates per country weekly percentiles | |
# | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.mlab as mlab | |
import matplotlib.dates as md | |
import csv | |
import datetime | |
from scipy import stats |
View gist:9af8c6edbf44becbc6ac
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# Reads a file like <count>, raw browser string | |
# and processses it to output: | |
# percentage, normalized browser string | |
import sys | |
import md5 | |
from ua_parser import user_agent_parser | |
# beautify ua |
View UaCalculator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
# read file | |
# File format is: | |
# {"browser_major":"1","os_family":"Android","os_major":"1","device_family":"Opus One","browser_family":"Android","os_minor":"5"} 5 | |
# {"browser_major":"1","os_family":"Android","os_major":"4","device_family":"icube 900","browser_family":"Baidu Explorer","os_minor":"2"} 1 | |
# hash and store values | |
# loop over values and |
View cat_file_to_tcp_endpoint
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin | |
import zmq | |
import io | |
import time | |
import sys | |
import re | |
# reads line by line a file and sends it | |
# to a tcp endpoint using zeromq | |
# handy to cat big files to a listener |
View gist:2f5e33902122870e44e0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
node 'limn1.eqiad.wmflabs' { | |
include webserver::apache | |
# make sure /var/log/apache2 is readable by wikidevs for debugging. | |
# This won't make the actual log files readable, only the directory. | |
# Individual log files can be created and made readable by | |
# classes that manage individual sites. | |
file { '/var/log/apache2': | |
ensure => 'directory', | |
owner => 'root', |
View gist:e0b0d4a702cdd45f6d37
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C{ | |
#include<stdio.h> | |
#include <time.h> | |
#include <string.h> | |
#define vcl_string char | |
char* get_expiration() { | |
struct tm str_time; | |
time_t time_of_day; | |
char expiration[100]; |
View testing_udf.hql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
add jar /home/nuria/workplace/refinery/source/refinery-core/target/refinery-core-0.0.19-SNAPSHOT.jar; | |
add jar /home/nuria/workplace/refinery/source/refinery-hive/target/refinery-hive-0.0.19-SNAPSHOT.jar; | |
CREATE TEMPORARY FUNCTION isPageview as 'org.wikimedia.analytics.refinery.hive.IsPageviewUDF'; | |
CREATE TEMPORARY FUNCTION isAppPageview as 'org.wikimedia.analytics.refinery.hive.IsAppPageviewUDF'; | |
use wmf; | |
select isPageview(uri_host, uri_path, uri_query, http_status, content_type, user_agent) from webrequest where year=2015 and month=09 and day=04 and hour=01; | |
--call hive like this leaving the hive.aux.jars.path | |
>hive --hiveconf hive.aux.jars.path= -f test-udf.hql |
View find_ips_with_less_than_10_ocurrences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- geocoded data on webrequest is like: | |
-- {"city":"Unknown","country_code":"--","longitude":"-1","postal_code":"Unknown","timezone":"Unknown","subdivision":"Unknown","continent":"Unknown","latitude":"-1","country":"Unknown"} | |
-- find records where by city we have less than 10 unique IPs | |
use wmf; | |
select wr1.client_ip, geocoded_data["city"] from webrequest as wr1 where year=2015 and month=09 and hour=01 | |
and wr1.client_ip in (select wr2.client_ip from webrequest wr2 where year=2015 and month=09 and hour=01 group by wr2.client_ip having count(*) <10); |
View gist:833fef6a74574125a3fc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
export LIBJARS=/home/nuria/avro-kafka/camus-example-0.1.0-wmf6.jar,/home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar | |
export HADOOP_CLASSPATH=/home/nuria/avro-kafka/camus-example-0.1.0-wmf6.jar:/home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar | |
/usr/bin/hadoop jar /home/nuria/avro-kafka/camus-wmf-0.1.0-wmf6.jar com.linkedin.camus.etl.kafka.CamusJob -libjars ${LIBJARS} -Dcamus.job.name="nuria_testing_avr | |
o" -P /home/nuria/avro-kafka/camus.avrotest.properties >> ./log_camus_avro_test.txt 2>&1 |
View gist:01fef56a8a69528fee93
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from kafka import KafkaConsumer | |
import avro.schema | |
import avro.io | |
import io | |
# To consume messages | |
consumer = KafkaConsumer('mediawiki_CirrusSearchRequestSet', | |
group_id='my_group', | |
metadata_broker_list=['kafka1012:9092']) |
OlderNewer