Skip to content

Instantly share code, notes, and snippets.

@ficapy
Last active March 4, 2018 02:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ficapy/ed909648b184bc2fc1486ce90175d356 to your computer and use it in GitHub Desktop.
Save ficapy/ed909648b184bc2fc1486ce90175d356 to your computer and use it in GitHub Desktop.
BK树和VP树
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: ficapy
# Create: '03/03/2018'
import random
from string import ascii_lowercase
def distance(str1: str, str2: str):
return sum(str1[i] != str2[i] for i in range(len(str1)))
class Node:
def __init__(self, content: str, child: dict):
self.content = content
self.child = child
class BKTree:
def __init__(self, init_content: str):
self.root = Node(init_content, {})
def insert(self, content: str):
d = distance(content, self.root.content)
node = Node(content, {})
if d not in self.root.child:
self.root.child[d] = node
return
else:
origin = self.root
self.root = self.root.child[d]
self.insert(content)
self.root = origin
def search(self, target, k, result=[]):
# 距离小于等于K的
x = distance(target, self.root.content)
if x <= k:
result.append(self.root.content)
for i, _ in self.root.child.items():
if i <= x + k:
origin = self.root
self.root = self.root.child[i]
self.search(target, k)
self.root = origin
return result
ori = list(set([''.join(random.choice(ascii_lowercase) for _ in range(4)) for _ in range(100)]))
bk = BKTree("abcd")
for i in ori:
bk.insert(i)
bk_result = bk.search(ori[0], 2)
general_ret = []
for i in ori:
if distance(i, ori[0]) <= 2:
general_ret.append(i)
print(bk_result)
print(general_ret)
http://www.matrix67.com/blog/archives/333
https://fribbels.github.io/vptree/writeup
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: ficapy
# Create: '03/03/2018'
import random
from string import ascii_lowercase
def distance(str1: str, str2: str):
return sum(str1[i] != str2[i] for i in range(len(str1)))
class Node:
def __init__(self, content: str, left: str, right: str):
self.content = content
self.left = left
self.right = right
self.radius = int(len(content) / 2)
class VPTree:
def __init__(self, init_content: str):
self.root = Node(init_content, None, None)
def insert(self, content: str):
d = distance(self.root.content, content)
node = Node(content, None, None)
if d <= self.root.radius:
if self.root.left is None:
self.root.left = node
else:
origin = self.root
self.root = self.root.left
self.insert(content)
self.root = origin
else:
if self.root.right is None:
self.root.right = node
else:
origin = self.root
self.root = self.root.right
self.insert(content)
self.root = origin
def search(self, target, k, result=[]):
# 距离小于等于K的
x = distance(target, self.root.content)
if x <= k:
result.append(self.root.content)
if self.root.radius > (x + k) and (self.root.left is not None):
origin = self.root
self.root = self.root.left
self.search(target, k)
self.root = origin
else:
origin = self.root
self.root = self.root.left
if self.root is not None:
self.search(target, k)
self.root = origin
self.root = self.root.right
if self.root is not None:
self.search(target, k)
self.root = origin
return result
ori = list(set([''.join(random.choice(ascii_lowercase) for _ in range(4)) for _ in range(100)]))
vp = VPTree("abcd")
for i in ori:
vp.insert(i)
vp_result = vp.search(ori[0], 2)
general_ret = []
for i in ori:
if distance(i, ori[0]) <= 2:
general_ret.append(i)
print(vp_result)
print(general_ret)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment