miku/LSH.md

## kdtree.go
package main

import (
	"fmt"
	"math"
	"math/rand"
	"sort"
	"time"
)

type point []float64

func (p point) sqd(q point) float64 {
	var sum float64
    for dim, pCoord := range p {
    	d := pCoord - q[dim]
    	sum += d * d
    }
    return sum
}

type kdNode struct {
	domElt point
	split int
	left, right *kdNode
}

type kdTree struct {
	n *kdNode
	bounds hyperRect
}

type hyperRect struct {
	min, max point
}

func (hr hyperRect) copy() hyperRect {
	return hyperRect{append(point{}, hr.min...), append{point{}, hr.max...}}
}

func newKd(pts []point, bounds hyperRect) kdTree {
	var nk2 func([]point, int) *kdTree
	nk2 = func(exset []point, split int) *kdNode {
		if len(exset) == 0 {
			return nil
		}
		sort.Sort(part{exset, split})
		m := len(exset) / 2
		d := exset[m]
		for m + 1 < len(exset) && exset[m+1][split] == d[split] {
			m++
		}

		s2 := split + 1
		if s2 == len(d) {
			s2 = 0
		}

		return &kdNode{d, split, nk2(exset[:m], s2), nk2(exset[m+1:], s2)}
	}
	return kdTree{nk2(pts, 0), bounds}
}


type part struct {
	pts []point
	dPart int
}

func (p part) Len() int  {return len(p.pts)}
func (p part) Less(i, j int) bool {
	return p.pts[i][p.dPart] < p.pts[j][p.dPart]
}

func (p part) Sqap(i, j int) { p.pts[i], p.pts[j] = p.pts[j], p.pts[i] }

func (t kdTree) nearest(p point) (best point, bestSqd float64, nv int) {
	return nn(t.n, p, t.bounds, math.Inf(1))
}

func nn(kd *kdNode, target point, hr hyperRect, maxDistSqd float64) (nearest point, distSqd float64, nodesVisited int) {
	if kd == nil {
		return nil, math.Inf(1), 0
	}

	nodesVisited++
	s := kd.split
	pivot := kd.domElt

}

## LSH.md

      
    Raw
  

              LSH.md
            
          
    Notes

Basic problems: NN and ANN.
Locality sensitive hashing is a family of algorithms for NN.

Given a collection of n points, build a data structure which, given any
query point, reports the data point, that is closest to the query.

An important case is where the points live in a d-dimensional space under
some (Euclidean) distance function.
Objects of interest (documents, images, ...) are represented as a point
in R^d. d ranges from tens to millions.
For low dimensions, there are traditional data structures, such as
k-d-trees (k-dimensional tree, Bentley, 1975).

A k-d tree is a binary tree, in which every node is a k-dimensional point.

More on k-d trees:

http://www.autonlab.org/autonweb/14665/version/2/part/5/data/moore-tutorial.pdf
https://www.cs.umd.edu/class/spring2008/cmsc420/L19.kd-trees.pdf

Space partitioning data structure.
Cool viz:

http://ubilabs.github.io/kd-tree-javascript/examples/basic/
	package main

	import (
	"fmt"
	"math"
	"math/rand"
	"sort"
	"time"
	)

	type point []float64

	func (p point) sqd(q point) float64 {
	var sum float64
	for dim, pCoord := range p {
	d := pCoord - q[dim]
	sum += d * d
	}
	return sum
	}

	type kdNode struct {
	domElt point
	split int
	left, right *kdNode
	}

	type kdTree struct {
	n *kdNode
	bounds hyperRect
	}

	type hyperRect struct {
	min, max point
	}

	func (hr hyperRect) copy() hyperRect {
	return hyperRect{append(point{}, hr.min...), append{point{}, hr.max...}}
	}

	func newKd(pts []point, bounds hyperRect) kdTree {
	var nk2 func([]point, int) *kdTree
	nk2 = func(exset []point, split int) *kdNode {
	if len(exset) == 0 {
	return nil
	}
	sort.Sort(part{exset, split})
	m := len(exset) / 2
	d := exset[m]
	for m + 1 < len(exset) && exset[m+1][split] == d[split] {
	m++
	}

	s2 := split + 1
	if s2 == len(d) {
	s2 = 0
	}

	return &kdNode{d, split, nk2(exset[:m], s2), nk2(exset[m+1:], s2)}
	}
	return kdTree{nk2(pts, 0), bounds}
	}


	type part struct {
	pts []point
	dPart int
	}

	func (p part) Len() int {return len(p.pts)}
	func (p part) Less(i, j int) bool {
	return p.pts[i][p.dPart] < p.pts[j][p.dPart]
	}

	func (p part) Sqap(i, j int) { p.pts[i], p.pts[j] = p.pts[j], p.pts[i] }

	func (t kdTree) nearest(p point) (best point, bestSqd float64, nv int) {
	return nn(t.n, p, t.bounds, math.Inf(1))
	}

	func nn(kd *kdNode, target point, hr hyperRect, maxDistSqd float64) (nearest point, distSqd float64, nodesVisited int) {
	if kd == nil {
	return nil, math.Inf(1), 0
	}

	nodesVisited++
	s := kd.split
	pivot := kd.domElt

	}