This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a = {'a': {'b': {'c': 1}}} | |
reduce(lambda d, key: d[key], l, a) | |
# >>> 1 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo modprobe vboxnetadp | |
sudo modprobe vboxdrv | |
sudo modprobe vboxnetflt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type ParsedDoc = Map[String, Any] | |
type PartialDoc = (UID, ParsedDoc) | |
/** | |
* Recursively merge two parsed json documents | |
*/ | |
private def merge(map1 : ParsedDoc, map2 : ParsedDoc): ParsedDoc = { | |
def mergeValues(o1 : Option[Any], o2 : Option[Any]) = | |
(o1, o2) match { | |
case (Some(v1 : ParsedDoc), Some(v2 : ParsedDoc)) => merge(v1, v2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Adapted from: https://github.com/jcrobak/avro-examples | |
import org.apache.spark.SparkContext | |
import org.apache.spark.SparkContext._ | |
import org.apache.avro.generic.GenericRecord | |
import org.apache.avro.mapred.AvroKey | |
import org.apache.avro.mapreduce.AvroKeyInputFormat | |
import org.apache.hadoop.io.NullWritable | |
import org.apache.commons.lang.StringEscapeUtils.escapeCsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def copy[D <: RawData](inputPath: String, outputPath: String, | |
splitsNum: Int = 128, replication: Int = 2) | |
(implicit sc: SparkContext, ct: ClassTag[D]): Unit = { | |
// use reflection to | |
// - get the ctor | |
// - get the canonical name tagged in annotation | |
val cls = ct.runtimeClass | |
val ctor = (args: Array[String]) => | |
cls.getConstructor(classOf[Array[String]]).newInstance(args) | |
val canonicalName = cls.getAnnotation(classOf[CanonicalName]).name() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dpark | |
lines = dpark.textFile('./bible.txt', numSplits=4) | |
lines = lines.map(lambda line: line.strip()) | |
words = lines.flatMap(lambda line: line.split()).map(lambda x: (x, 1)) | |
wc = words.reduceByKey(lambda x, y: x + y) | |
wc.saveAsCSVFile('/tmp/dpark_result/', dialect='excel-tab', compress=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from cdf.utils.kvstore import LevelDB | |
import struct | |
class LevelDBExternalSort(object): | |
SEP = '\0' | |
FMT = '>i' | |
def __init__(self, tmp_dir=None, **configs): | |
if tmp_dir is None: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dpark | |
import logging | |
import os | |
import re | |
logging.basicConfig(level=logging.DEBUG) | |
def list_files(dirpath, full_path=True, regexp=None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.reflect.ParameterizedType | |
import com.google.common.reflect.TypeToken | |
import scala.reflect.ClassTag | |
import scala.reflect.runtime.universe._ | |
class KV[I, O] |
OlderNewer