Last active
June 23, 2021 01:15
-
-
Save asukaminato0721/c25aa4a4b1b0a98b24a0a858be1f060d to your computer and use it in GitHub Desktop.
k-means without numpy, 无第三方库的实现
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from doctest import testmod | |
from math import dist as 距离 | |
from pathlib import Path | |
from random import sample | |
from statistics import fmean | |
from typing import List, Tuple | |
Point = Tuple[float, ...] | |
def 计算中间点(点集: List[Point]): | |
""" | |
>>> 计算中间点([(6,8),(-5,8)]) | |
(0.5, 8.0) | |
""" | |
return tuple(map(fmean, zip(*点集))) | |
def kmeans(点集: List[Point], k: int): | |
初始点集合 = sample(点集, k) | |
结果字典 = {i: [x] for i, x in enumerate(初始点集合)} | |
均值字典 = dict(enumerate(初始点集合)) | |
样本点的集合 = set(点集) - set(初始点集合) | |
for 点 in 样本点的集合: | |
最近的均值编号 = min(range(k), key=lambda i: 距离(均值字典[i], 点)) | |
结果字典[最近的均值编号].append(点) | |
更新均值 = 计算中间点(结果字典[最近的均值编号]) | |
均值字典[最近的均值编号] = 更新均值 | |
return 结果字典 | |
if __name__ == "__main__": | |
testmod(verbose=True) | |
def 读取文件(): | |
with open(Path(__file__).parent.absolute() / "data.txt") as 文件: | |
yield from (tuple(map(float, 每一行.split())) for 每一行 in 文件) | |
print(kmeans(list(读取文件()), 5)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment