This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################## | |
# Source interfaces | |
######################################################################## | |
@dataclass | |
class Source: | |
__metaclass__ = ABCMeta | |
@abstractmethod | |
def poll() -> List[ Tuple[ str,str ] ]: | |
return NotImplemented() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
runMain in Compile := Defaults.runMainTask( fullClasspath in Compile, runner in(Compile, run) ).evaluated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
runMain in Compile := Defaults.runMainTask( fullClasspath in Compile, runner in(Compile, run) ).evaluated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def min_max_hashes(text, window=60): | |
hashes = [murmurhash(text[i:i+window]) for i in range(len(text)-window+1)] | |
return [min(hashes), max(hashes)] | |
def shingleprints(text): | |
min1, max1 = min_max_hashes(text[0:len(text)/2]) | |
min2, max2 = min_max_hashes(text[len(text)/2:]) | |
# combine pairs, using your favorite hash-value combiner | |
return [hash_combine(min1, min2), | |
hash_combine(min1, max2), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def minhash(text, window=25): # assume len(text) > 50 | |
hashes = [murmurhash(text[i:i+window]) for i in range(len(text)-window+1)] | |
return set(sorted(hashes)[0:20]) | |
def similarity(text1, text2): | |
hashes1 = minhash(text1) | |
hashes2 = minhash(text2) | |
return len(hashes1 & hashes2) / len(hashes1) | |
A = "one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// sourced from http://users.eecs.northwestern.edu/~cji970/pub/cjinBigDataService2015.pdf | |
JavaRDD<String> subGraphIdRDD = sc.textFile(idFileLoc,numGraphs); | |
JavaPairRDD<Integer, Edge> subMSTs = subGraphIdRDD.flatMapToPair(new LocalMST(filesLoc, numSplits)); | |
numGraphs = numSplits * numSplits / 2; | |
numGraphs = (numGraphs + (K - 1)) / K; | |
JavaPairRDD<Integer, Iterable<Edge>> mstToBeMerged = subMSTs.combineByKey( new CreateCombiner(), new Merger(),new KruskalReducer(numPoints),numGraphs); | |
while (numGraphs > 1) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
until [ -n "$APP_UP" ]; do | |
echo 'service not available yet' | |
APP_UP=$(netstat -an | grep 1234) | |
sleep 3 | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://stackoverflow.com/questions/5564690/tell-sbt-to-collect-all-my-dependencies-together | |
val libraryJarPath = outputPath / "lib" | |
def collectJarsTask = { | |
val jars = mainDependencies.libraries +++ mainDependencies.scalaJars | |
FileUtilities.copyFlat(jars.get, libraryJarPath, log) | |
} | |
lazy val collectJars = task { collectJarsTask; None } dependsOn(compile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val client : OkHttpClient = new OkHttpClient.Builder().build() | |
val TEXT : MediaType = MediaType.get( "text/plain; charset=utf-8" ) | |
def asyncRequest( text : String ) : Future[ String ] = { | |
val body = RequestBody.create( text, TEXT ) | |
val request = new Request.Builder().url( "http://michael.com" ).post( body ).build() | |
val promise : Promise[ String ] = Promise[ String ]() | |
client.newCall( httpRequest ).enqueue( new Callback { | |
override def onFailure( call : Call, e : IOException ) : Unit = promise.failure( e ) |
NewerOlder