Skip to content

Instantly share code, notes, and snippets.

@ansjsun
ansjsun / tantivy
Created August 18, 2020 04:45
tantivy analyzer
use super::BoxTokenStream;
use super::{Token, TokenStream, Tokenizer};
use std::str::Chars;
/// Tokenize the text by natural division.
#[derive(Clone)]
pub struct StandardTokenizer;
pub struct StandardTokenStream<'a> {
text: &'a str,
@ansjsun
ansjsun / chain
Created December 8, 2018 15:10
go rpc chan
````
package domain
import "github.com/tiglabs/baudengine/util/log"
type Handler interface {
Execute(request *Request, response *Response) error
}
type Chain struct {
@ansjsun
ansjsun / chain
Created December 8, 2018 15:10
go rpc chan
````
package domain
import "github.com/tiglabs/baudengine/util/log"
type Handler interface {
Execute(request *Request, response *Response) error
}
type Chain struct {
@ansjsun
ansjsun / gist:c7252bbb4d6c100f62c6
Last active August 29, 2015 14:01
从当前系统变量中找到某个文件或者文件夹.
package org.nlpchina.web.util;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* 从系统各个环境中找文件
@ansjsun
ansjsun / gist:ca155b7495350ae57f92
Created May 12, 2014 11:57
logistic_regression
import math ;
import matplotlib.pyplot as plt
import re;
data = [] ;
class instance:
def __init__(self,array):
self.id = array[0]
@ansjsun
ansjsun / gist:b4d5e80cca99b05d1903
Created May 10, 2014 12:00
python 梯度线性回归
import math ;
import matplotlib.pyplot as plt
x_list = [2.0658746, 2.3684087, 2.5399929, 2.5420804, 2.549079, 2.7866882, 2.9116825, 3.035627, 3.1146696, 3.1582389, 3.3275944, 3.3793165, 3.4122006, 3.4215823, 3.5315732, 3.6393002, 3.6732537, 3.9256462, 4.0498646, 4.2483348, 4.3440052, 4.3826531, 4.4230602, 4.6102443, 4.6881183, 4.9777333, 5.0359967, 5.0684536, 5.4161491, 5.4395623, 5.4563207, 5.5698458, 5.6015729, 5.6877617, 5.7215602, 5.8538914, 6.1978026, 6.3510941, 6.4797033, 6.7383791, 6.8637686, 7.0223387, 7.0782373, 7.1514232, 7.4664023, 7.5973874, 7.7440717, 7.7729662, 7.8264514, 7.9306356]
y_list = [0.77918926, 0.91596757, 0.90538354, 0.90566138, 0.9389889, 0.9668474, 0.96436824, 0.91445939, 0.93933944, 0.96074971, 0.89837094, 0.91209739, 0.94238499, 0.96624578, 1.05265, 1.0143791, 0.95969426, 0.96853716, 1.0766065, 1.1454978, 1.0340625, 1.0070009, 0.96683648, 1.0895919, 1.0634462, 1.1237239, 1.0323374, 1.0874452, 1.0702988, 1.1606493, 1.0778037, 1.1069758, 1.0971875, 1.1648603, 1.1411796, 1.0844156,
@ansjsun
ansjsun / gist:10817327
Last active August 29, 2015 13:59
知识表述

wordnet

Wordnet 的基本设计原理是它的“词汇矩阵模型” 一个词汇矩阵从理论上可以用单词与其同义词集合之间的映射来表示。当某个词有多个同义词时,通常同义词集合足以满足差异性的要求。当然,同义词是词形之间的一种词汇关系,但由于这种关系在Wordnet中被赋予了中心角色。因此,同义关系的词被放在{}中,与其他被放进[]中的词汇关系的词区别开来。Wordnet是按语义关系组织,由于语义关系是多个词义之间的关系,而词义用同义词集合来表示,因此很自然地把语义关系看作为同义词集合之间的一些指针。

entrty news epg event org actor tag

  • 1.每一个实体名词都可以用这份tag词表进行描述
  • 2.每一个文章都可以用这份tag词表进行描述
  • 3.每一个epg都可以用这份tag词表进行描述
  • 5.tag词表中的词可以重复。由文章的上下文来确定具体对应的实体
  • 6.所对应的实体,有自身的属性,这个属性具有归一化的特点。需要处理别名操作(属性目前的作用也许不是那么重要)
@ansjsun
ansjsun / l-bfgs.py
Created January 15, 2014 13:22 — forked from yuyay/l-bfgs.py
import numpy as np
from scipy.optimize import fmin_bfgs
def rosen(x):
return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:1])**2.0)
def rosen_der(x):
xm = x[1:-1]
xm_m1 = x[:-2]
xm_p1 = x[2:]
@ansjsun
ansjsun / gist:7444020
Last active December 28, 2015 04:39
感知机-Percepron
package org.ansj.ml;
import java.util.Arrays;
public class Perceptron {
public static void main(String[] args) {
//三个点
int[][] T = { { 3, 3 }, { 4, 3 }, { 1, 1 } };
//定义应三个点类别
@ansjsun
ansjsun / crf.py
Created November 8, 2013 03:56 — forked from neubig/crf.py
#!/usr/bin/python
# crf.py (by Graham Neubig)
# This script trains conditional random fields (CRFs)
# stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences
# stdout: Feature vectors for emission and transition properties
from collections import defaultdict
from math import log, exp
import sys