Skip to content

Instantly share code, notes, and snippets.

@sheimi
sheimi / LimitedInstances.java
Created June 23, 2012 14:14 — forked from kavinyao/LimitedInstances.java
A solution to limited instances problem using Proxy Pattern
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
/*
* A solution using Proxy Pattern to limited instances problem
* Requirement:
* class RareResource must have no more than 3 instances at any run time.
* Note:
* Black magic like reflection is out of consideration as you can never prevent it.
@sheimi
sheimi / listToDict.py
Created July 23, 2012 05:39
convert to list to a dict
keys = ['a', 'b']
values = ['c', 'd']
dictionary = dict(zip(keys, values))
@sheimi
sheimi / CVJNI.java
Created September 5, 2012 08:42
JNI Demo (invode opencv ...)
import java.io.*;
public class CVJNI {
//Load jni library
static {
try {
System.loadLibrary("cvjni");
} catch (Exception e) {
e.printStackTrace();
}
@sheimi
sheimi / Iter.java
Created January 11, 2013 15:17
software-architecture-review-4: Programming to Interface (Iterator)
import java.util.*;
public class Iter {
public static void main(String[] args) {
Aggregate<Integer, Integer, Integer> ag = new DataList<Integer, Integer, Integer>();
((DataList)ag).add(new ThreeD<Integer, Integer, Integer>(1, 2, 3));
((DataList)ag).add(new ThreeD<Integer, Integer, Integer>(1, 2, 3));
((DataList)ag).add(new ThreeD<Integer, Integer, Integer>(1, 2, 3));
Iterator<Integer> ia = ag.createIteratorA();
while (ia.next() != null) {
@sheimi
sheimi / HelloJNI-modified.java
Created November 9, 2014 05:01
code in blog.sheimi.me: hello JNI
//Here is HelloJNI.java
public class HelloJNI {
static {
try {
Class c = HelloJNI.class;
URL location =
c.getProtectionDomain().getCodeSource().getLocation();
ZipFile zf = new ZipFile(location.getPath());
// libhellojni.so is put in the lib folder
@sheimi
sheimi / crawl.java
Last active August 29, 2015 14:09
code in blog.sheimi.me: 2012-05-13-hadoop-source-code-01 (1)
// code snippet in nutch
Injector injector = new Injector(getConf());
Generator generator = new Generator(getConf());
Fetcher fetcher = new Fetcher(getConf());
ParseSegment parseSegment = new ParseSegment(getConf());
CrawlDb crawlDbTool = new CrawlDb(getConf());
LinkDb linkDbTool = new LinkDb(getConf());
injector.inject(crawlDb, rootUrlDir);
int i;
@sheimi
sheimi / crawl.java
Created November 9, 2014 05:23
code in blog.sheimi.me: 2012-05-13-hadoop-source-code-01 (2)
linkDbTool.invert(linkDb, segments, true, true, false); // invert links
if (solrUrl != null) {
// index, dedup & merge
FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
SolrIndexer indexer = new SolrIndexer(getConf());
indexer.indexSolr(solrUrl, crawlDb, linkDb,
Arrays.asList(HadoopFSUtil.getPaths(fstats)));
SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
dedup.setConf(getConf());
@sheimi
sheimi / injector.java
Created November 9, 2014 05:27
code in blog.sheimi.me: 2012-05-17-source-code-02 (1) injector
if (LOG.isInfoEnabled()) {
LOG.info("Injector: Converting injected urls to crawl db entries.");
}
JobConf sortJob = new NutchJob(getConf());
sortJob.setJobName("inject " + urlDir);
FileInputFormat.addInputPath(sortJob, urlDir);
sortJob.setMapperClass(InjectMapper.class);
FileOutputFormat.setOutputPath(sortJob, tempDir);
sortJob.setOutputFormat(SequenceFileOutputFormat.class);
@sheimi
sheimi / injector.java
Created November 9, 2014 05:28
code in blog.sheimi.me: 2012-05-17-source-code-02 (2) injector
public static JobConf createJob(Configuration config, Path crawlDb)
throws IOException {
Path newCrawlDb = new Path(crawlDb,
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
JobConf job = new NutchJob(config);
job.setJobName("crawldb " + crawlDb);
Path current = new Path(crawlDb, CURRENT_NAME);
if (FileSystem.get(job).exists(current)) {
@sheimi
sheimi / injector.java
Created November 9, 2014 05:29
code in blog.sheimi.me: 2012-05-17-source-code-02 (3) injector
public static void install(JobConf job, Path crawlDb) throws IOException {
Path newCrawlDb = FileOutputFormat.getOutputPath(job);
FileSystem fs = new JobClient(job).getFs();
Path old = new Path(crawlDb, "old");
Path current = new Path(crawlDb, CURRENT_NAME);
if (fs.exists(current)) {
if (fs.exists(old)) fs.delete(old, true);
fs.rename(current, old);
}
fs.mkdirs(crawlDb);