chilang

## gist:728223
package graphflow

import org.specs.Specification
class Graph {
    val nodes:scala.collection.mutable.Set[flow.Node] =
        new scala.collection.mutable.HashSet[flow.Node]()
    val edges:scala.collection.mutable.Set[flow.Connection] =
        new scala.collection.mutable.HashSet[flow.Connection]()

    override def toString = "nodes:"+nodes.toString + "\nedges:"+edges.toString

## geohash.scala
scala> def mid(pair:(Double,Double)) = (pair._1+pair._2) / 2
mid: (pair: (Double, Double))Double

scala> def decode(range:(Double,Double), bin:String) = bin.map(_.toInt - '0'.toInt).foldLeft(range)( (acc,bit) => if (bit == 0) (acc._1, mid(acc)) else (mid(acc), acc._2) )
decode: (range: (Double, Double), bin: String)(Double, Double)

scala> def decodeLat(bin:String) = mid(decode((-90.0,90.0), bin))
decodeLat: (bin: String)Double

scala> def decodeLon(bin:String) = mid(decode((-180.0,180.0), bin))

## geohash.scala
//http://en.wikipedia.org/wiki/Geohash
object geohash {
	val LAT_RANGE = (-90.0, 90.0)
	val LON_RANGE = (-180.0, 180.0)

	private def mid(pair:(Double,Double)) = (pair._1+pair._2) / 2

	private def decodePart(range:(Double,Double), bin:String) = bin.map(_.toInt - '0'.toInt).foldLeft(range)( (acc,bit) => if (bit == 0) (acc._1, mid(acc)) else (mid(acc), acc._2) )

	import base32._

## iOSNotesTo1Password
1. Backup iPad/iPhone to Mac (without encryption)

2. Locate backup folder and run https://code.google.com/p/iphone-dataprotection/:

    python python_scripts/backup_tool.py ~/Library/Application\ Support/MobileSync/Backup/XXXXXXX outdir

3. Open sqlite db under using SquirrelSQL:

    outdir/HomeDomain/Library/Notes/notes.sqlite

## df_fit.py
def fit(self, X, y):
    if self.intercept_ is None or self.coef_ is None:
        features = X.shape[1]
        self.intercept_ = np.zeros(1)
        self.coef_ = np.zeros((1, features))

## df_collect_models.py
def _collect_models(self, runners, N):
    r_intercepts, r_coefs, r_weights = [], [], []
    self.models = []
    for runner in random.sample(runners, k=self.sample_size):
        r_model = runner.optimise(self.intercept_, self.coef_, self.runner_hyperparams)
        self.models.append(r_model)
        r_intercepts.append(r_model.intercept_)
        r_coefs.append(r_model.coef_)
        r_weights.append(runner.dataset_size()/N if self.combine == 'weighted' else 1/self.sample_size)
    return r_intercepts, r_coefs, r_weights

## df_fit_average.py
    for _ in range(self.rounds):
        r_intercepts, r_coefs, r_weights = self._collect_models(runners, N)
        self.intercept_ = np.average(r_intercepts, axis=0, weights=r_weights)
        self.coef_ = np.average(r_coefs, axis=0, weights=r_weights)

    self.global_model = set_weights(self.intercept_, self.coef_, np.unique(y))
    return self

## df_label_distribution_skew.py
def label_distribution_skew(x, y, partitions, skew=1):
    def runner_split(N_labels, N_runners):
        """number of labels to assign to n runners"""
        runner_labels = round(max(1, N_labels / N_runners))
        runner_split = round(max(1, N_runners / N_labels))
        return runner_labels, runner_split

    runner_data = []
    N_labels = np.unique(y).shape[0]
    n_labels, n_runners = runner_split(N_labels, partitions)

## df_compare_f1.py
def compare_f1(hyperparams, rounds=1):
    local_mean_f1_scores = []
    local_max_f1_scores = []
    fed_avg_f1_scores = []
    for i in range(0, rounds):
        fed_avg = FedAvg(**hyperparams)
        fed_avg.fit(X_train, y_train)
        preds = fed_avg.predict(X_test)
        fed_avg_f1_scores.append(f1_score(y_test, preds, average='weighted'))
        tmp = []

## df_skew.py
skews = {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99}
scores = {}
for skew in skews:
    print("eval", skew)
    hyperparams = {
        'n_runners': 10,
        'sample_size': 10,
        'rounds': 1,
        'combine': 'weighted', # or 'mean'
        'partition_params': {
	package graphflow

	import org.specs.Specification
	class Graph {
	val nodes:scala.collection.mutable.Set[flow.Node] =
	new scala.collection.mutable.HashSet[flow.Node]()
	val edges:scala.collection.mutable.Set[flow.Connection] =
	new scala.collection.mutable.HashSet[flow.Connection]()

	override def toString = "nodes:"+nodes.toString + "\nedges:"+edges.toString
	scala> def mid(pair:(Double,Double)) = (pair._1+pair._2) / 2
	mid: (pair: (Double, Double))Double

	scala> def decode(range:(Double,Double), bin:String) = bin.map(_.toInt - '0'.toInt).foldLeft(range)( (acc,bit) => if (bit == 0) (acc._1, mid(acc)) else (mid(acc), acc._2) )
	decode: (range: (Double, Double), bin: String)(Double, Double)

	scala> def decodeLat(bin:String) = mid(decode((-90.0,90.0), bin))
	decodeLat: (bin: String)Double

	scala> def decodeLon(bin:String) = mid(decode((-180.0,180.0), bin))
	//http://en.wikipedia.org/wiki/Geohash
	object geohash {
	val LAT_RANGE = (-90.0, 90.0)
	val LON_RANGE = (-180.0, 180.0)

	private def mid(pair:(Double,Double)) = (pair._1+pair._2) / 2

	private def decodePart(range:(Double,Double), bin:String) = bin.map(_.toInt - '0'.toInt).foldLeft(range)( (acc,bit) => if (bit == 0) (acc._1, mid(acc)) else (mid(acc), acc._2) )

	import base32._
	1. Backup iPad/iPhone to Mac (without encryption)

	2. Locate backup folder and run https://code.google.com/p/iphone-dataprotection/:

	python python_scripts/backup_tool.py ~/Library/Application\ Support/MobileSync/Backup/XXXXXXX outdir

	3. Open sqlite db under using SquirrelSQL:

	outdir/HomeDomain/Library/Notes/notes.sqlite
	def fit(self, X, y):
	if self.intercept_ is None or self.coef_ is None:
	features = X.shape[1]
	self.intercept_ = np.zeros(1)
	self.coef_ = np.zeros((1, features))
	def _collect_models(self, runners, N):
	r_intercepts, r_coefs, r_weights = [], [], []
	self.models = []
	for runner in random.sample(runners, k=self.sample_size):
	r_model = runner.optimise(self.intercept_, self.coef_, self.runner_hyperparams)
	self.models.append(r_model)
	r_intercepts.append(r_model.intercept_)
	r_coefs.append(r_model.coef_)
	r_weights.append(runner.dataset_size()/N if self.combine == 'weighted' else 1/self.sample_size)
	return r_intercepts, r_coefs, r_weights
	for _ in range(self.rounds):
	r_intercepts, r_coefs, r_weights = self._collect_models(runners, N)
	self.intercept_ = np.average(r_intercepts, axis=0, weights=r_weights)
	self.coef_ = np.average(r_coefs, axis=0, weights=r_weights)

	self.global_model = set_weights(self.intercept_, self.coef_, np.unique(y))
	return self
	def label_distribution_skew(x, y, partitions, skew=1):
	def runner_split(N_labels, N_runners):
	"""number of labels to assign to n runners"""
	runner_labels = round(max(1, N_labels / N_runners))
	runner_split = round(max(1, N_runners / N_labels))
	return runner_labels, runner_split

	runner_data = []
	N_labels = np.unique(y).shape[0]
	n_labels, n_runners = runner_split(N_labels, partitions)
	def compare_f1(hyperparams, rounds=1):
	local_mean_f1_scores = []
	local_max_f1_scores = []
	fed_avg_f1_scores = []
	for i in range(0, rounds):
	fed_avg = FedAvg(**hyperparams)
	fed_avg.fit(X_train, y_train)
	preds = fed_avg.predict(X_test)
	fed_avg_f1_scores.append(f1_score(y_test, preds, average='weighted'))
	tmp = []
	skews = {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99}
	scores = {}
	for skew in skews:
	print("eval", skew)
	hyperparams = {
	'n_runners': 10,
	'sample_size': 10,
	'rounds': 1,
	'combine': 'weighted', # or 'mean'
	'partition_params': {