Skip to content

Instantly share code, notes, and snippets.

@ansjsun
ansjsun / BinarySearchAbsMin
Created January 29, 2013 05:31
#面试编程题#一个有序数组(从小到大排列),数组中的数据有正有负,求这个数组中的最小绝对值。
public class BinarySearchAbsMin {
public static void main(String[] args) {
int[] ints = { -3213, -232, -453, -5, -3, -2, 3, 8, 834, 2342, 999 };
int min = searchAbsMin(ints, 0, ints.length);
System.out.println(min);
}
// 二分查找
private static int searchAbsMin(int[] ints, int start, int end) {
@ansjsun
ansjsun / gist:5290005
Created April 2, 2013 04:47
AnsjStringUtil去除html标记(非严格)
public class AnsjStringUtil {
private static int[] filter = new int[128];
private static int[] filterEnd = new int[128];
static {
filter['<'] = Integer.MAX_VALUE/2;
filterEnd['<'] = '>';
filter['&'] = 10;
@ansjsun
ansjsun / gist:5290012
Created April 2, 2013 04:48
TagWord关键字标红
import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
/**
* 文本坐标记用的 tagBegin是开始标记 tagEnd 是结束标记 用了二分法查找来确定单词 content 是传入的正文 正文可以传多次
* 标记词语也可以传多次
*
@ansjsun
ansjsun / gist:5355112
Created April 10, 2013 14:27
线性回归代码
public class LogisticRegression {
public int N;
public int n_in;
public int n_out;
public double[][] W;
public double[] b;
public LogisticRegression(int N, int n_in, int n_out) {
this.N = N;
this.n_in = n_in;
@ansjsun
ansjsun / gist:5791394
Created June 16, 2013 08:31
字符串gzip压缩后通过字符串传输数据demo
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class StringZip {
public static void main(String[] args) throws Exception {
StringBuilder value = new StringBuilder() ;
@ansjsun
ansjsun / gist:6005498
Created July 16, 2013 03:21
html文章正文抽取类.计算正文
package com.kuyun.nlp;
import java.util.ArrayList;
import java.util.List;
import com.kuyun.nlp.util.PageDown;
public class HtmlExtraction {
public static void main(String[] args) {
String html = PageDown
@ansjsun
ansjsun / gist:6304960
Created August 22, 2013 09:18
word2vec java 读取 model
package com.ansj.vec;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ansjsun
ansjsun / gist:7102015
Last active December 26, 2015 05:39
动态规划背包问题.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 一个数组中倒找使得两个数组平均化的解
*
* @author ansj
*
*/
@ansjsun
ansjsun / crf.py
Created November 8, 2013 03:56 — forked from neubig/crf.py
#!/usr/bin/python
# crf.py (by Graham Neubig)
# This script trains conditional random fields (CRFs)
# stdin: A corpus of WORD_POS WORD_POS WORD_POS sentences
# stdout: Feature vectors for emission and transition properties
from collections import defaultdict
from math import log, exp
import sys
@ansjsun
ansjsun / gist:7444020
Last active December 28, 2015 04:39
感知机-Percepron
package org.ansj.ml;
import java.util.Arrays;
public class Perceptron {
public static void main(String[] args) {
//三个点
int[][] T = { { 3, 3 }, { 4, 3 }, { 1, 1 } };
//定义应三个点类别