Skip to content

Instantly share code, notes, and snippets.

@dengshilong
dengshilong / sphinxclient.c
Last active August 29, 2015 14:03
发现sphinxclient.c的api竟然没有flush索引到硬盘的功能,于是写了增加了flush功能的sphinxclient.c
//
// $Id: sphinxclient.c 4097 2013-08-20 09:28:24Z kevg $
//
//
// Copyright (c) 2001-2013, Andrew Aksyonoff
// Copyright (c) 2008-2013, Sphinx Technologies Inc
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
@dengshilong
dengshilong / crawl_buzz.py
Created November 21, 2013 09:27
抓取百度风云榜的热词。在urls.txt中写入 http://top.baidu.com/buzz?b=18等需要抓取的二级分类链接
# -*- encoding: UTF-8 -*-
import urllib2
import re
from datetime import date
def get_page(url):
"""得到一个网页的内容"""
try:
print "crawling %s" % url
headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5",\
"Referer": 'http://www.baidu.com'}
import math
from operator import itemgetter, attrgetter
from collections import defaultdict
def UserSimilarity(train):
#build inverse table for item_users
print "build inverse table for item_users"
item_users = defaultdict(set)
for u,items in train.iteritems():
for i in items.keys():
item_users[i].add(u)
@dengshilong
dengshilong / send_file.py
Last active August 2, 2016 02:55
使用smtplib的例子
#--*-- encoding:utf-8 --*--
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from datetime import date
def sendMail(user,pwd,to,subject,filename):
outer = MIMEMultipart()
outer['From'] = user
outer['To'] = to
@dengshilong
dengshilong / count_program_line.py
Created December 3, 2012 07:02
统计指定目录下的源文件行数
# -*- coding: utf-8 -*-
"""统计指定目录下,指定源文件类型的总行数"""
from collections import defaultdict
import os
d = defaultdict(int)
path = r"D:/vc++workspaces" #路径
f = ['.c','.cpp'] #需要统计源文件的类型
for dirpath,dirnames,filenames in os.walk(path):
for filename in filenames:
path = os.path.join(dirpath, filename)