Skip to content

Instantly share code, notes, and snippets.

View mrk-andreev's full-sized avatar

Mark Andreev mrk-andreev

View GitHub Profile
test("in") {
# main = createDataDataset(Seq(Data("a", "Berlin"), Data("b", "Madrid"), Data("c", "Rome")))
# dict = createDictDataset(Seq(Dict("a"), Dict("b")))
val result = main.join(dict, Seq("index"), "left_semi")
val expected = createDataDataset(Seq(
Data("a", "Berlin"), Data("b", "Madrid"))).orderBy("index")
assert(result.schema === expected.schema)
assert(result.collect() === expected.toDF().collect())
}
object PlaygroundUtil {
def in(main: DataFrame, dict: DataFrame, joinColumn: String): DataFrame = {
main.join(dict, Seq(joinColumn), "left_semi")
}
def notIn(main: DataFrame, dict: DataFrame, joinColumn: String): DataFrame = {
main.join(dict, Seq(joinColumn), "left_anti")
}
}
select concat(database, '.', table) as table,
formatReadableSize(sum(bytes)) as size,
sum(rows) as rows,
max(modification_time) as latest_modification,
sum(bytes) as bytes_size,
any(engine) as engine,
formatReadableSize(sum(primary_key_bytes_in_memory)) as primary_keys_size
from system.parts
where active
group by database, table
// ==UserScript==
// @name JupyterUserscript
// @version 0.1
// @author Mark Andreev
// ==/UserScript==
(function() {
'use strict';
var YOUR_NAME = 'Mark,';
// ==UserScript==
// @name JupyterExt
// @namespace http://tampermonkey.net/
// @version 0.1
// @description
// @author mrk-andreev
// @match http://localhost:8888/notebooks/*
// @grant none
// ==/UserScript==
@mrk-andreev
mrk-andreev / ipython_notebook_in_git.md
Created May 5, 2017 11:11 — forked from pbugnion/ ipython_notebook_in_git.md
Keeping IPython notebooks under Git version control

This gist lets you keep IPython notebooks in git repositories. It tells git to ignore prompt numbers and program outputs when checking that a file has changed.

To use the script, follow the instructions given in the script's docstring.

For further details, read this blogpost.

The procedure outlined here is inspired by this answer on Stack Overflow.

@mrk-andreev
mrk-andreev / ipython_notebook_in_git.md
Created May 5, 2017 11:11 — forked from pbugnion/ ipython_notebook_in_git.md
Keeping IPython notebooks under Git version control

This gist lets you keep IPython notebooks in git repositories. It tells git to ignore prompt numbers and program outputs when checking that a file has changed.

To use the script, follow the instructions given in the script's docstring.

For further details, read this blogpost.

The procedure outlined here is inspired by this answer on Stack Overflow.