Skip to content

Instantly share code, notes, and snippets.

Configuration jobConf = new Configuration();
jobConf.set("xmlinput.start", "<Start>");
jobConf.set("xmlinput.end", "</Start>");
Configuration itemConf = new Configuration();
itemConf.set("mongo.output.uri", "mongodb://localhost:27017/db.collection");
Configuration itemConf2 = new Configuration();
itemConf2.set("mongo.output.uri", "mongodb://localhost:27017/db.collection2");
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoException;
import com.mongodb.hadoop.MongoOutput;
import com.mongodb.hadoop.io.BSONWritable;
import com.mongodb.hadoop.io.MongoUpdateWritable;
import com.mongodb.hadoop.output.MongoRecordWriter;
import java.io.IOException;
@dmregister
dmregister / gist:efca36dbd3abce709b08
Last active August 29, 2015 14:16
Spark Custom Input
JavaPairRDD<LongWritable, Text> records = ctx.newAPIHadoopFile("path.xml", XmlInputFormat.class, LongWritable.class, Text.class, jobConf);
JavaPairRDD<String, String> words = records.map(new PairFunction<Tuple2<LongWritable, Text>, LongWritable, Text>() {
@Override
public Tuple2<Text,Text> call(Text line) {
System.out.println(line);
return new Tuple2("","");
}
});
@dmregister
dmregister / gist:ad90537feb15fd4db12d
Created December 3, 2014 22:03
Tokenizer:Keyword
GET /my_index/_analyze?analyzer=search_keyword_analyzer&text=re/max
{
"tokens": [
{
"token": "r",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 1
{
"id": 1,
"searchId": "63fffacb209cef6ad958a10b01f43b70",
"listingCategory": "For Sale",
"propertyType": "Any",
"neigborhood": "East Little Havana",
"city": "Miami",
"state": "FL",
"stateFull": "Florida",
"searchString": "East Little Havana, Miami, FL", //this was changed from location
{"index":{"_index":"autocompletecities","_type":"city"}}
{"city":"Adak","state":"AK","suggestCity":{"output":"Adak, AK","input":["Adak","Adak AK","Adak, AK"],"weight":0,"payload":{"lat":51.88214,"lon":-176.59993}},"location":{"lat":51.88214,"lon":-176.59993}}
@dmregister
dmregister / gist:72d4d067cdcc0035c290
Created September 11, 2014 19:18
Logstash config and sample
#config
input {
file {
type => "application-error"
path => "E:/wamp/logs/application-error.log"
tags => [ "hubdin-error", "application-error" ]
codec => json
}
file {
type => "application-info"
{
"data": {
"listings": [
{
"id": 42,
"listingId": 1212995,
"address": "2020 N BAYSHORE DR # 2110",
"city": "Miami",
"state": "FL",
"zipcode": "33137",
@dmregister
dmregister / gist:98a314904f6f3a0a6378
Created June 11, 2014 13:47
format for deleting multiple items
#area
{
"area": [
{
"id": 13
},
{
"id": 14
}
]
Array
(
[query] => Array
(
[filtered] => Array
(
[query] => Array
(
[match_all] => Array
(