Skip to content

Instantly share code, notes, and snippets.

Avatar

Andrew Otto ottomata

View GitHub Profile
View gist:2e9933dfac6c9f05ea314eb9eb86e454
# From the build directory
$ ls anaconda-wmf/lib/libpython3.* | cat
anaconda-wmf/lib/libpython3.7m.a
anaconda-wmf/lib/libpython3.7m.nolto.a
anaconda-wmf/lib/libpython3.7m.so
anaconda-wmf/lib/libpython3.7m.so.1.0
anaconda-wmf/lib/libpython3.so
# From the built .deb
$ dpkg-deb -c /var/cache/pbuilder/result/buster-amd64/anaconda-wmf_2020.02~wmf1_amd64.deb | grep lib/libpython3
View validation.error.event.json
{
"meta": {
"id": "dc5507a1-ac79-464d-ae60-cd327e5570c3",
"dt": "2020-07-06T18:11:19.252Z",
"uri": "unknown",
"domain": "www.wikidata.org",
"request_id": "4338bff5-cad5-4fe6-ab67-69121ebc78b1",
"stream": "eventgate-analytics-external.error.validation"
},
"emitter_id": "eventgate-analytics-external-production",
View gist:8b5c65a9d6b0f3500c436ae2dd6aeaa2
# Get EventLoggingSchemas config for TemplateWizard in beta
curl -s 'https://deployment.wikimedia.beta.wmflabs.org/w/load.php?debug=true&lang=en&modules=ext.centralNotice.geoIP%7Cext.centralauth.centralautologin%7Cext.dismissableSiteNotice%2CeventLogging%2CnavigationTiming%2Cpopups%2CwikimediaEvents%7Cext.uls.common%2Ccompactlinks%2Cinit%2Cinterface%2Cpreferences%2Cwebfonts%7Cext.urlShortener.toolbar%7Cjquery%2Csite%7Cjquery.client%2Ccookie%2CtextSelection%7Cjquery.uls.data%7Cmediawiki.String%2CTitle%2CUri%2Capi%2Cbase%2Ccldr%2Ccookie%2Cexperiments%2CjqueryMsg%2Clanguage%2Cstorage%2Cuser%2Cutil%7Cmediawiki.editfont.styles%7Cmediawiki.libs.pluralruleparser%7Cmediawiki.page.ready%2Cstartup%7Cmediawiki.ui.button%7Cskins.vector.js%7Cuser.defaults&skin=vector&version=vzm6n' | grep -i templatewizard
"TemplateWizard": "/analytics/legacy/templatewizard/1.0.0",
"eventlogging_TemplateWizard": [],
# Get EventLoggingSchemas config for TemplateWizard in beta enwiki
curl -s 'https://en.wik
View eventlogging_legacy_schema_convert.js
#!/usr/bin/env node
'use strict';
const fetch = require('node-fetch');
const jsTools = require('@wikimedia/jsonschema-tools');
/**
* Recurses through schema converting Draft 4 JSONSchema style
* required to Draft 7.
View backfill_searchsatisfaction_0.scala
import scala.collection.JavaConverters._
import scala.collection.immutable.ListMap
import org.wikimedia.analytics.refinery.spark.sql.JsonSchemaConverter
import org.wikimedia.analytics.refinery.core.jsonschema.EventSchemaLoader
import org.apache.spark.sql.functions.from_json
import org.apache.spark.sql.DataFrame
import org.wikimedia.analytics.refinery.job.refine._
import org.wikimedia.analytics.refinery.spark.connectors.DataFrameToHive
import org.wikimedia.analytics.refinery.spark.sql.PartitionedDataFrame
View page.py
def get_all_schedule_b_results(
committee_id=['C00618389', 'C00637512'],
sort='-disbursement_date',
two_year_transaction_period=[2018, 2020]
):
"""
Gets all paginated results of schedule b results for the given parameters.
The returned value will be the list of all result object records.
"""
View gist:828f08a145a2744612b0ba44b38dbc73
# schedule be keys.txt
committed_id
money_for_baths
# OR
keys = ['committee_id', 'money_for_baths']
View gist:ba6564fd616dfe2eb73681bab513c37d
15:32:04 [@logstash1010:/srv/kafka/data/udp_localhost-info-2] $ stat /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
File: /srv/kafka/data/udp_localhost-info-2/00000000004911836348.log
Size: 1073741181 Blocks: 2097152 IO Block: 4096 regular file
Device: 901h/2305d Inode: 10741850117 Links: 1
Access: (0644/-rw-r--r--) Uid: ( 499/ kafka) Gid: ( 499/ kafka)
Access: 2020-04-16 15:26:56.963215263 +0000
Modify: 2020-01-01 05:07:39.234657596 +0000
Change: 2020-01-01 05:07:39.234657596 +0000
Birth: -
View geocode_data.scala
// spark2-shell --driver-java-options='-Drefinery.log.level=DEBUG' --jars /srv/deployment/analytics/refinery/artifacts/refinery-hive.jar
sc.setLogLevel("DEBUG")
spark.sql("CREATE TEMPORARY FUNCTION geocode_data as 'org.wikimedia.analytics.refinery.hive.GetGeoDataUDF'")
case class Ip(ip: String) {}
val data = Seq(Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"), Ip("81.2.69.160"))
val df = spark.createDataFrame(data).repartition(2)
df.selectExpr("geocode_data(ip)").show
// java.lang.NullPointerException
View eventlogging_to_mep_schema.js
'use strict';
const got = require('got');
const jsTools = require('@wikimedia/jsonschema-tools');
const yaml = require('js-yaml');
/**
* Tests if obj is an Object (not an array).
* @param {[type]} obj [description]
* @return {Boolean} [description]
You can’t perform that action at this time.