Skip to content

Instantly share code, notes, and snippets.

# coding=utf-8
from selenium import webdriver
from bs4 import BeautifulSoup
import time
driver=webdriver.Chrome()
driver.set_page_load_timeout(10)
try:
driver.get("http://music.163.com/#/song?id=409060868")
except selenium.common.exceptions.TimeoutException:
print("time out of 10 s")
# coding=utf-8
from numpy import *
import operator
# 分类器inx为预测数据集,dataset为训练数据集,labels为训练数据集中的标签
def classify(inx,dataset,labels,k):
# 求预测数据集与训练数据集的距离
datasize=dataset.shape[0]
d=tile(inx,(datasize,1))-dataset
dd=d**2
distance=(dd.sum(axis=1))**0.5
# coding=utf-8
import urllib2
from bs4 import BeautifulSoup
class BDTB:
def __init__(self,baseurl,seeLZ,floorTag):
self.baseurl=baseurl
self.seeLZ='?see_lz='+str(seeLZ)
self.file=None
self.floor=1
self.floorTag=floorTag
# coding=utf-8
# 测试数据集
def sampletrain(cl):
cl.train('Nobody owns the water.', 'good')
cl.train('the quick rabbit jumps fences', 'good')
cl.train('buy pharmaceuticals now', 'bad')
cl.train('make quick money at the online casino', 'bad')
cl.train('the quick brown fox jumps', 'good')
# 正则表达式从文本中提取特征
import re