Max DePizzottri

## friends.scala
val resps = ids.map{id => (id, scala.io.Source.fromURL(s"https://api.vk.com/method/friends.get?user_id=$id&v=5.75&access_token=f84c59d4f84c59d4f84c59d4dcf81b0b18ff84cf84c59d4a2b2c591eeadd073c87a8122").mkString)}.toMap
val friends = resps.map{case(id,resp) => (id, (parse(resp) \\ "response" \\ classOf[JInt]))}.toMap
val ff = friends.map{case(id, fl) => (id, fl.filter{f => ids.contains(f.toInt)})}
import java.io._
val pw = new PrintWriter(new File(s"bmen_filtered.gv" ))
pw.write("graph buddahs_men_all {\n")
ids.map{id=>pw.write(s"$id;\n")}
ff.map{case(id, fl) => fl.map{fid => pw.write(s"$id -- $fid;\n")}}
pw.write("}")
pw.flush()

## mixins_inheritance.cpp
#include <iostream>
#include <vector>

using namespace std;

class Base {
public:
    Base(int a): data(a) {}
    virtual ~Base() = default;
//protected:

## from_url.scala
import org.apache.spark.sql.functions.{array, lit, map, struct}
import java.net.URLEncoder

import org.apache.spark.sql.types._
//val schema = StructType(Array(StructField("location",ArrayType(StructType(Array(StructField("lat",DoubleType,true), StructField("lng",DoubleType,true))),true),true), StructField("name",StringType,false)))

case class City(lat:Double, lng:Double, name:String)

val cities = scala.io.Source.fromFile("buddahs_cities_raw.txt")(scala.io.Codec.UTF8).getLines.map(_.trim).toList
val city = URLEncoder.encode(cities.head, "UTF-8")

## cluster_synonyms.scala
import org.apache.spark.sql.Row
import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer

type Summarizer = MultivariateOnlineSummarizer

case class VectorSumarizer(f: String) extends org.apache.spark.sql.expressions.Aggregator[org.apache.spark.sql.Row, Summarizer, org.apache.spark.mllib.linalg.Vector] with Serializable {

## export_graph_from_mongo.scala
import com.mongodb.casbah.Imports._

val addresses = List(new ServerAddress("meows1"))
val mongoClient = MongoClient(addresses)
mongoClient.setReadPreference(ReadPreference.SecondaryPreferred)
val vk_all = mongoClient("vk_all")
val uids = scala.io.Source.fromFile("buddahs_uids.txt").getLines.map(_.toLong).toSet
var m = scala.collection.mutable.HashMap.empty[String, List[String]]

for(uid <- uids) {

## cluster_interconnection.scala
val gl = scala.io.Source.fromFile("buddahs_only.gv").getLines.map{ l =>
val Array(u, v) = l.split("--").map{_.trim.toLong}
(u, v)
}
val g = spark.sparkContext.parallelize(gl.toSeq).toDF("v", "u")

val clusters = spark.read.json("buddahs_50W2V_20means_clusters.json")
val rclusters = clusters.select("uid", "cidx")

val clusters_a = clusters.alias("clusters")

## uu_walls.scala
../bin/spark-shell --jars jars/elasticsearch-spark_2.10-2.4.4.jar --conf spark.es.nodes=90.188.38.166  --conf spark.es.nodes.discovery=false --conf spark.es.nodes.wan.only=true --conf spark.es.scroll.size=500000

import org.elasticsearch.spark._
import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer}
import org.apache.spark.ml.feature.Word2Vec
import sqlContext.implicits._

## buddahs.scala
../bin/spark-shell --jars jars/elasticsearch-spark_2.10-2.4.4.jar --conf spark.es.nodes=192.168.1.4  --conf spark.es.nodes.discovery=false --conf spark.es.nodes.wan.only=true

curl -XPUT "http://localhost:9200/vk_test_ui/_settings" -d'
{
    "index.search.slowlog.threshold.query.debug": "0s"
}'

import org.elasticsearch.spark._

val esquery = scala.io.Source.fromFile("query.txt").getLines.next()

## CRDTClusteringMain.hpp
#include "stdafx.h"

using namespace caf;

#include "CRDTClustering.hpp"

#include "AWORSetActor.hpp"
#include "Replicator.hpp"

behavior cluster_client(event_based_actor* self, actor awors1) {

## AWORSetActor.hpp
#pragma once

#include <caf/all.hpp>
#include <caf/io/all.hpp>

#include "AWORSet.hpp"

using namespace caf;
using namespace std;
	val resps = ids.map{id => (id, scala.io.Source.fromURL(s"https://api.vk.com/method/friends.get?user_id=$id&v=5.75&access_token=f84c59d4f84c59d4f84c59d4dcf81b0b18ff84cf84c59d4a2b2c591eeadd073c87a8122").mkString)}.toMap
	val friends = resps.map{case(id,resp) => (id, (parse(resp) \\ "response" \\ classOf[JInt]))}.toMap
	val ff = friends.map{case(id, fl) => (id, fl.filter{f => ids.contains(f.toInt)})}
	import java.io._
	val pw = new PrintWriter(new File(s"bmen_filtered.gv" ))
	pw.write("graph buddahs_men_all {\n")
	ids.map{id=>pw.write(s"$id;\n")}
	ff.map{case(id, fl) => fl.map{fid => pw.write(s"$id -- $fid;\n")}}
	pw.write("}")
	pw.flush()
	#include <iostream>
	#include <vector>

	using namespace std;

	class Base {
	public:
	Base(int a): data(a) {}
	virtual ~Base() = default;
	//protected:
	import org.apache.spark.sql.functions.{array, lit, map, struct}
	import java.net.URLEncoder

	import org.apache.spark.sql.types._
	//val schema = StructType(Array(StructField("location",ArrayType(StructType(Array(StructField("lat",DoubleType,true), StructField("lng",DoubleType,true))),true),true), StructField("name",StringType,false)))

	case class City(lat:Double, lng:Double, name:String)

	val cities = scala.io.Source.fromFile("buddahs_cities_raw.txt")(scala.io.Codec.UTF8).getLines.map(_.trim).toList
	val city = URLEncoder.encode(cities.head, "UTF-8")
	import org.apache.spark.sql.Row
	import org.apache.spark.sql.expressions.Aggregator
	import org.apache.spark.ml.linalg.{Vector, Vectors}
	import org.apache.spark.sql.{Encoder, Encoders}
	import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
	import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer

	type Summarizer = MultivariateOnlineSummarizer

	case class VectorSumarizer(f: String) extends org.apache.spark.sql.expressions.Aggregator[org.apache.spark.sql.Row, Summarizer, org.apache.spark.mllib.linalg.Vector] with Serializable {
	import com.mongodb.casbah.Imports._

	val addresses = List(new ServerAddress("meows1"))
	val mongoClient = MongoClient(addresses)
	mongoClient.setReadPreference(ReadPreference.SecondaryPreferred)
	val vk_all = mongoClient("vk_all")
	val uids = scala.io.Source.fromFile("buddahs_uids.txt").getLines.map(_.toLong).toSet
	var m = scala.collection.mutable.HashMap.empty[String, List[String]]

	for(uid <- uids) {
	val gl = scala.io.Source.fromFile("buddahs_only.gv").getLines.map{ l =>
	val Array(u, v) = l.split("--").map{_.trim.toLong}
	(u, v)
	}
	val g = spark.sparkContext.parallelize(gl.toSeq).toDF("v", "u")

	val clusters = spark.read.json("buddahs_50W2V_20means_clusters.json")
	val rclusters = clusters.select("uid", "cidx")

	val clusters_a = clusters.alias("clusters")
	../bin/spark-shell --jars jars/elasticsearch-spark_2.10-2.4.4.jar --conf spark.es.nodes=90.188.38.166 --conf spark.es.nodes.discovery=false --conf spark.es.nodes.wan.only=true --conf spark.es.scroll.size=500000

	import org.elasticsearch.spark._
	import org.apache.spark.ml.clustering.KMeans
	import org.apache.spark.mllib.linalg.Vectors
	import org.apache.spark.mllib.linalg.Vector
	import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer}
	import org.apache.spark.ml.feature.Word2Vec
	import sqlContext.implicits._
	../bin/spark-shell --jars jars/elasticsearch-spark_2.10-2.4.4.jar --conf spark.es.nodes=192.168.1.4 --conf spark.es.nodes.discovery=false --conf spark.es.nodes.wan.only=true

	curl -XPUT "http://localhost:9200/vk_test_ui/_settings" -d'
	{
	"index.search.slowlog.threshold.query.debug": "0s"
	}'

	import org.elasticsearch.spark._

	val esquery = scala.io.Source.fromFile("query.txt").getLines.next()
	#include "stdafx.h"

	using namespace caf;

	#include "CRDTClustering.hpp"

	#include "AWORSetActor.hpp"
	#include "Replicator.hpp"

	behavior cluster_client(event_based_actor* self, actor awors1) {
	#pragma once

	#include <caf/all.hpp>
	#include <caf/io/all.hpp>

	#include "AWORSet.hpp"

	using namespace caf;
	using namespace std;