admin c93614

## xapianmlt.php
<?php

$posting = $database->postlist_begin( $search_id );
$enquire = new XapianEnquire( $database );
$rset = new XapianRset();
$rset->add_document( $posting->get_docid() );
$eset = $enquire->get_eset(20, $rset);

$i = $eset->begin();
$terms = array();

## jquery.ba-tinypubsub.js
/*!
 * jQuery Tiny Pub/Sub - v0.6 - 1/10/2011
 * http://benalman.com/
 *
 * Copyright (c) 2010 "Cowboy" Ben Alman
 * Dual licensed under the MIT and GPL licenses.
 * http://benalman.com/about/license/
 */

(function($){

## wawammseg.py
# -*- coding:utf-8 -*-

'写了一个简单的支持中文的正向最大匹配的机械分词,其它不用解释了，就几十行代码'
'搜狗词库下载地址：http://vdisk.weibo.com/s/7RlE5'

import string
__dict = {}

def load_dict(dict_file='words.dic'):
    '加载词库，把词库加载成一个key为首字符，value为相关词的列表的字典'

## daemon.py
#! /usr/bin/env python2.7
#encoding:utf-8
#@description:一个python守护进程的例子
#@tags:python,daemon
import sys
import os
import time
import atexit
from signal import SIGTERM

## gist:3187794
<?php

date_default_timezone_set( 'Europe/Moscow' );
ini_set( 'memory_limit', '1024M' );

$ch = curl_init();
curl_setopt( $ch, CURLOPT_URL, 'https://email.us-east-1.amazonaws.com' );
curl_setopt( $ch, CURLOPT_POST, true );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_HEADER, true );

## gist:3848993
// requires
var utils = require('utils');
var casper = require('casper').create()
var casper = require('casper').create({
    verbose: true,
    logLevel: "debug"
});

// setup globals
var email = casper.cli.options['email'] || 'REPLACE THIS EMAIL';

## git_guidline.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                c93614
                / git_guidline.md
            
            
              Created
              October 16, 2012 06:26
                — forked from onlytiancai/git_guidline.md
            
              
                git分支使用规范
              
          
    分支管理

最少有三个长期分支

master: 用于生产环境部署
testing: 用于测试环境测试
dev: 用于日常开发

有一些临时分支

  
## workflow_demo.go
func callA() string {
	time.Sleep(time.Millisecond * 300)
	return "Hello A"
}

func callB() string {
	time.Sleep(time.Millisecond * 100)
	return "Hello B"
}

## tf-idf.php
These weights are often combined into a tf-idf value, simply by multiplying them together. The best scoring words under tf-idf are uncommon ones which are repeated many times in the text, which lead early web search engines to be vulnerable to pages being stuffed with repeated terms to trick the search engines into ranking them highly for those keywords. For that reason, more complex weighting schemes are generally used, but tf-idf is still a good first step, especially for systems where no one is trying to game the system.

There are a lot of variations on the basic tf-idf idea, but a straightforward implementation might look like:

    <?php
    $tfidf = $term_frequency *  // tf
        log( $total_document_count / $documents_with_term, 2); // idf
    ?>

It's worth repeating that the IDF is the total document count over the count of the ones containing the term. So, if there were 50 documents in the collection, and two of them contained the term in question, the IDF would be 50/2 = 25. To be accurate, we s

## WordsDetector.py
# -*- coding=utf-8 -*-
# 修改了原gist里space tab混乱的情况
import feedparser
import re
import collections
import math

def info_entropy(words):
    result = 0
    total = sum([val for _, val in words.iteritems()])
	<?php

	$posting = $database->postlist_begin( $search_id );
	$enquire = new XapianEnquire( $database );
	$rset = new XapianRset();
	$rset->add_document( $posting->get_docid() );
	$eset = $enquire->get_eset(20, $rset);

	$i = $eset->begin();
	$terms = array();
	/*!
	* jQuery Tiny Pub/Sub - v0.6 - 1/10/2011
	* http://benalman.com/
	*
	* Copyright (c) 2010 "Cowboy" Ben Alman
	* Dual licensed under the MIT and GPL licenses.
	* http://benalman.com/about/license/
	*/

	(function($){
	# -- coding:utf-8 --

	'写了一个简单的支持中文的正向最大匹配的机械分词,其它不用解释了，就几十行代码'
	'搜狗词库下载地址：http://vdisk.weibo.com/s/7RlE5'

	import string
	__dict = {}

	def load_dict(dict_file='words.dic'):
	'加载词库，把词库加载成一个key为首字符，value为相关词的列表的字典'
	#! /usr/bin/env python2.7
	#encoding:utf-8
	#@description:一个python守护进程的例子
	#@tags:python,daemon
	import sys
	import os
	import time
	import atexit
	from signal import SIGTERM
	<?php

	date_default_timezone_set( 'Europe/Moscow' );
	ini_set( 'memory_limit', '1024M' );

	$ch = curl_init();
	curl_setopt( $ch, CURLOPT_URL, 'https://email.us-east-1.amazonaws.com' );
	curl_setopt( $ch, CURLOPT_POST, true );
	curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
	curl_setopt( $ch, CURLOPT_HEADER, true );
	// requires
	var utils = require('utils');
	var casper = require('casper').create()
	var casper = require('casper').create({
	verbose: true,
	logLevel: "debug"
	});

	// setup globals
	var email = casper.cli.options['email'] \|\| 'REPLACE THIS EMAIL';
	func callA() string {
	time.Sleep(time.Millisecond * 300)
	return "Hello A"
	}

	func callB() string {
	time.Sleep(time.Millisecond * 100)
	return "Hello B"
	}
	These weights are often combined into a tf-idf value, simply by multiplying them together. The best scoring words under tf-idf are uncommon ones which are repeated many times in the text, which lead early web search engines to be vulnerable to pages being stuffed with repeated terms to trick the search engines into ranking them highly for those keywords. For that reason, more complex weighting schemes are generally used, but tf-idf is still a good first step, especially for systems where no one is trying to game the system.

	There are a lot of variations on the basic tf-idf idea, but a straightforward implementation might look like:

	<?php
	$tfidf = $term_frequency * // tf
	log( $total_document_count / $documents_with_term, 2); // idf
	?>

	It's worth repeating that the IDF is the total document count over the count of the ones containing the term. So, if there were 50 documents in the collection, and two of them contained the term in question, the IDF would be 50/2 = 25. To be accurate, we s
	# -- coding=utf-8 --
	# 修改了原gist里space tab混乱的情况
	import feedparser
	import re
	import collections
	import math

	def info_entropy(words):
	result = 0
	total = sum([val for _, val in words.iteritems()])