This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package default; | |
import org.apache.lucene.util.automaton.Automaton; | |
import org.apache.lucene.util.automaton.BasicAutomata; | |
import org.apache.lucene.util.automaton.RegExp; | |
public class RegexAutomatonTest { | |
public void testSSN() { | |
Automaton full = new RegExp("[0-9]{3}-[0-9]{2}-[0-9]{4}").toAutomaton(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<ivy-module version="2.0"> | |
<info organisation="demo" module="trihug-kafka-demo"/> | |
<configurations> | |
<conf name="default"/> | |
</configurations> | |
<dependencies> | |
<dependency org="org.apache.kafka" name="kafka_2.9.2" rev="0.8.0-beta1" conf="default->default"/> | |
<exclude org="com.sun.jdmk"/> | |
<exclude org="com.sun.jmx"/> | |
</dependencies> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version='1.0' encoding='UTF-8'?> | |
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>org.apache.kafka</groupId> | |
<artifactId>kafka_2.9.2</artifactId> | |
<packaging>jar</packaging> | |
<description>kafka</description> | |
<version>0.8.0-beta1</version> | |
<name>kafka</name> | |
<organization> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*Pig script to convert the user,movie,rating,timestamp data to a user-user graph for running adsorption algorithm. | |
The format of the input data is | |
1::122::5::838985046 | |
*/ | |
/*Loading the data into a table. The delimiter might be different for different inputs. */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.Map; | |
import org.apache.pig.EvalFunc; | |
import org.apache.pig.data.DataType; | |
import org.apache.pig.data.Tuple; | |
/** | |
* Simple UDF to allow modifying an existing map[] datum | |
* | |
* Usage: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from threading import Thread, Event | |
from Queue import Queue | |
class Proc(Thread): | |
def __init__(self, in_queue): | |
Thread.__init__(self) | |
self.in_queue = in_queue | |
self.die = Event() | |
def stop(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
length, 29: 00 00 00 1d | |
api key: 00 07 | |
api version: 00 00 | |
correlation 42: 00 00 00 2a | |
clientId "foo": 00 03 66 6f 6f | |
group "test-group": 00 0a 74 65 73 74 2d 67 72 6f 75 70 | |
array length: 00 00 00 00 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.concurrent.atomic.AtomicLong; | |
import java.util.concurrent.BlockingQueue; | |
import java.util.concurrent.LinkedBlockingQueue; | |
import java.util.concurrent.TimeUnit; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.ExecutorService; | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.io.File; | |
import java.io.IOException; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<fields> | |
<field name="id" type="string" indexed="true" stored="true" required="true"/> | |
<field name="_version_" type="long" indexed="true" stored="true"/> | |
<!-- DocValue fields --> | |
<field name="threadId_dv" type="string" indexed="false" stored="false" docValues="true" default=""/> | |
<field name="docId_dv" type="tint" indexed="false" stored="false" docValues="true" default="0"/> | |
<field name="wordId_dv" type="tint" indexed="false" stored="false" docValues="true" default="0"/> | |
<field name="word_dv" type="string" indexed="false" stored="false" docValues="true" default=""/> | |
<field name="count_dv" type="tint" indexed="false" stored="false" docValues="true" default="0"/> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class SolrUpdater implements Runnable { | |
private final UpdateRequest req = new UpdateRequest(); | |
private final SolrServer solr; | |
private final BlockingQueue<String> strings; | |
private final AtomicLong id; | |
private final int batchSize = 100; | |
private volatile int batchedUpdates = 0; | |
public SolrUpdater(SolrServer solr, BlockingQueue<String> strings, | |
AtomicLong id) { |