This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// spark2-shell --jars /srv/deployment/analytics/refinery/artifacts/refinery-job.jar | |
/** | |
* Use RefineTarget.find to find all Refine targets for an input (camus job) in the last N hours. | |
* Then filter for any for which the _REFINED_FAILED flag exists. | |
*/ | |
import import org.apache.hadoop.fs.Path | |
import org.joda.time.format.DateTimeFormatter | |
import com.github.nscala_time.time.Imports._ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is the EventStreams RecentChange stream endpoint | |
var url = 'https://stream.wikimedia.org/v2/stream/recentchange'; | |
// Use EventSource (available in most browsers, or as an | |
// npm module: https://www.npmjs.com/package/eventsource) | |
// to subscribe to the stream. | |
var recentChangeStream = new EventSource(url); | |
// Print each event to the console | |
recentChangeStream.onmessage = function(message) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -X POST 'http://localhost:8082/druid/v2/?pretty' -H 'Content-Type:application/json' -H 'Accept:application/json' -d '{ | |
"queryType":"segmentMetadata", | |
"dataSource":"wmf_netflow", | |
"intervals":["2019-09-01/2019-10-01"] | |
}' |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From stat1004: | |
# pyspark2 --jars ~otto/spark-sql-kafka-0-10_2.11-2.3.1.jar,~otto/kafka-clients-1.1.0.jar | |
# Need spark-sql-kafka for DataStream source and kafka-clients for Kafka serdes. | |
from pyspark.sql.functions import * | |
from pyspark.sql.types import * | |
# Declare a Spark schema that matches the JSONData. | |
# In a future MEP world this would be automatically loaded | |
# from a JSONSchema. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
select CONCAT(year, '-', LPAD(month, 2, '0'), '-', LPAD(day, 2, '0')) AS date, | |
count(1) as n_events | |
from event.externalguidance | |
where year=2019 and month=6 | |
and not useragent.is_bot | |
and event.action = 'init' | |
group by year, month, day | |
order by date | |
limit 1000000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import math | |
f = sys.argv[1] | |
_file = open(f) | |
data = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set number | |
syntax enable | |
set cursorline | |
set showcmd | |
'show invisible chars' | |
set listchars=tab:→\ ,space:·,nbsp:␣,trail:•,eol:¶,precedes:«,extends:» | |
set list |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- make session dataset samller to be able to try things fast | |
--create table session_tryouts as select * from classifier_data_sorted a where a.sessionId in (select distinct s.sessionId from classifier_data_sorted s limit 100); | |
drop table if exists classifier_data_label; | |
create table | |
classifier_data_label | |
as | |
select | |
sessionId, | |
(unix_timestamp(max(ts)) - unix_timestamp( min(ts))) as length, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spark-submit --class org.wikimedia.analytics.refinery.job.AppSessionMetrics --master yarn --num-executors=6 --executor-cores=2 --executor-memory=2g /mnt/hdfs/tmp/nuria/jars/refinery-job-0 | |
.0.10-SNAPSHOT.jar hdfs://analytics-hadoop/tmp/mobile-apps-sessions 2015 03 10 |