Skip to content

Instantly share code, notes, and snippets.

@asukaminato0721
Last active June 23, 2021 01:15
Show Gist options
  • Save asukaminato0721/c25aa4a4b1b0a98b24a0a858be1f060d to your computer and use it in GitHub Desktop.
Save asukaminato0721/c25aa4a4b1b0a98b24a0a858be1f060d to your computer and use it in GitHub Desktop.
k-means without numpy, 无第三方库的实现
from doctest import testmod
from math import dist as 距离
from pathlib import Path
from random import sample
from statistics import fmean
from typing import List, Tuple
Point = Tuple[float, ...]
def 计算中间点(点集: List[Point]):
"""
>>> 计算中间点([(6,8),(-5,8)])
(0.5, 8.0)
"""
return tuple(map(fmean, zip(*点集)))
def kmeans(点集: List[Point], k: int):
初始点集合 = sample(点集, k)
结果字典 = {i: [x] for i, x in enumerate(初始点集合)}
均值字典 = dict(enumerate(初始点集合))
样本点的集合 = set(点集) - set(初始点集合)
for 点 in 样本点的集合:
最近的均值编号 = min(range(k), key=lambda i: 距离(均值字典[i], 点))
结果字典[最近的均值编号].append(点)
更新均值 = 计算中间点(结果字典[最近的均值编号])
均值字典[最近的均值编号] = 更新均值
return 结果字典
if __name__ == "__main__":
testmod(verbose=True)
def 读取文件():
with open(Path(__file__).parent.absolute() / "data.txt") as 文件:
yield from (tuple(map(float, 每一行.split())) for 每一行 in 文件)
print(kmeans(list(读取文件()), 5))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment