Last active
March 4, 2018 02:18
-
-
Save ficapy/ed909648b184bc2fc1486ce90175d356 to your computer and use it in GitHub Desktop.
BK树和VP树
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Author: ficapy | |
# Create: '03/03/2018' | |
import random | |
from string import ascii_lowercase | |
def distance(str1: str, str2: str): | |
return sum(str1[i] != str2[i] for i in range(len(str1))) | |
class Node: | |
def __init__(self, content: str, child: dict): | |
self.content = content | |
self.child = child | |
class BKTree: | |
def __init__(self, init_content: str): | |
self.root = Node(init_content, {}) | |
def insert(self, content: str): | |
d = distance(content, self.root.content) | |
node = Node(content, {}) | |
if d not in self.root.child: | |
self.root.child[d] = node | |
return | |
else: | |
origin = self.root | |
self.root = self.root.child[d] | |
self.insert(content) | |
self.root = origin | |
def search(self, target, k, result=[]): | |
# 距离小于等于K的 | |
x = distance(target, self.root.content) | |
if x <= k: | |
result.append(self.root.content) | |
for i, _ in self.root.child.items(): | |
if i <= x + k: | |
origin = self.root | |
self.root = self.root.child[i] | |
self.search(target, k) | |
self.root = origin | |
return result | |
ori = list(set([''.join(random.choice(ascii_lowercase) for _ in range(4)) for _ in range(100)])) | |
bk = BKTree("abcd") | |
for i in ori: | |
bk.insert(i) | |
bk_result = bk.search(ori[0], 2) | |
general_ret = [] | |
for i in ori: | |
if distance(i, ori[0]) <= 2: | |
general_ret.append(i) | |
print(bk_result) | |
print(general_ret) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://www.matrix67.com/blog/archives/333 | |
https://fribbels.github.io/vptree/writeup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Author: ficapy | |
# Create: '03/03/2018' | |
import random | |
from string import ascii_lowercase | |
def distance(str1: str, str2: str): | |
return sum(str1[i] != str2[i] for i in range(len(str1))) | |
class Node: | |
def __init__(self, content: str, left: str, right: str): | |
self.content = content | |
self.left = left | |
self.right = right | |
self.radius = int(len(content) / 2) | |
class VPTree: | |
def __init__(self, init_content: str): | |
self.root = Node(init_content, None, None) | |
def insert(self, content: str): | |
d = distance(self.root.content, content) | |
node = Node(content, None, None) | |
if d <= self.root.radius: | |
if self.root.left is None: | |
self.root.left = node | |
else: | |
origin = self.root | |
self.root = self.root.left | |
self.insert(content) | |
self.root = origin | |
else: | |
if self.root.right is None: | |
self.root.right = node | |
else: | |
origin = self.root | |
self.root = self.root.right | |
self.insert(content) | |
self.root = origin | |
def search(self, target, k, result=[]): | |
# 距离小于等于K的 | |
x = distance(target, self.root.content) | |
if x <= k: | |
result.append(self.root.content) | |
if self.root.radius > (x + k) and (self.root.left is not None): | |
origin = self.root | |
self.root = self.root.left | |
self.search(target, k) | |
self.root = origin | |
else: | |
origin = self.root | |
self.root = self.root.left | |
if self.root is not None: | |
self.search(target, k) | |
self.root = origin | |
self.root = self.root.right | |
if self.root is not None: | |
self.search(target, k) | |
self.root = origin | |
return result | |
ori = list(set([''.join(random.choice(ascii_lowercase) for _ in range(4)) for _ in range(100)])) | |
vp = VPTree("abcd") | |
for i in ori: | |
vp.insert(i) | |
vp_result = vp.search(ori[0], 2) | |
general_ret = [] | |
for i in ori: | |
if distance(i, ori[0]) <= 2: | |
general_ret.append(i) | |
print(vp_result) | |
print(general_ret) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment