Skip to content

Instantly share code, notes, and snippets.

View vatsan's full-sized avatar

Srivatsan Ramanujam vatsan

View GitHub Profile
DROP FUNCTION IF EXISTS posdemo.tag_pos(varchar);
CREATE FUNCTION posdemo.tag_pos(varchar)
RETURNS SETOF token_tag
AS
'postagger.nlp.POSTagger.tagTweet'
IMMUTABLE LANGUAGE PLJAVAU;
package postagger.util;
/**
* A class to hold the {token, index, tag} triplet.
* @author Srivatsan Ramanujam<vatsan.cs@utexas.edu>
*
*/
public class TaggedResult {
private int index;
private String token;
package postagger.util;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.postgresql.pljava.ResultSetProvider;
/**
* Class implement PL/Java’s ResultSetProvider interface, to handle the return of rows of
package postagger.nlp;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.postgresql.pljava.ResultSetProvider;
import postagger.util.TaggedResult;
import postagger.util.TaggedResultProvider;
//CMU Ark Tweet NLP package (GPL v2 license)
import cmu.arktweetnlp.*;
import cmu.arktweetnlp.Tagger.TaggedToken;
select id,
(t).indx,
(t).token,
(t).tag
from
(
select id,
posdemo.tag_pos(tweet_body) as t
from posdemo.training_data
) q;
id | indx | token | tag
------------+------+----------+-----
1467810672 | 0 | is | V
1467810672 | 1 | upset | A
1467810672 | 2 | that | P
1467810672 | 3 | he | O
1467810672 | 4 | can't | V
1467810672 | 5 | update | V
1467810672 | 6 | his | D
1467810672 | 7 | Facebook | ^
Table "posdemo.training_data"
Column | Type | Modifiers | Storage | Description
------------+-----------------------------+-----------+----------+-------------
id | bigint | | plain |
ts | timestamp without time zone | | plain |
poster | text | | extended |
tweet_body | text | | extended |
Has OIDs: no
Distributed by: (id)
vatsandb=# select id, tweet_body from posdemo.training_data limit 5;
id | tweet_body
------------+-----------------------------------------------------------------------------------------
1467820906 | @localtweeps Wow, tons of replies from you, may have to unfollow so I can see my friends' tweets, you're scrolling the feed a lot.
1467862806 | @MySteezRadio I'm goin' to follow u, since u didn't LOL GO ANGELS!
1467891880 | Argh! I was suuuper sleepy an hour ago, now I'm wide awake. Hope I don't stay up all night. :-/
1467896211 | michigan state you make me sad
1467911846 | @bananaface IM SORRY I GOT YOU SICK. lol. going to bed too. NIGHT!
public static class ImageToSequenceMapper extends Mapper<LongWritable,Text,Text, Text> {
// The input to each mapper is a set of lines from a text file on HDFS, where each line contains a path to an HDFS image file
// The job input format is NLineInputFormat and the job output format is TextOutputFormat
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// The input argument value contains the HDFS path to the image file
String imgName = value.toString();
FileSystem fs = FileSystem.get(context.getConfiguration());
FSDataInputStream in = null;
g++ -shared -Wl,-soname,canny_edge_detection -fPIC -ocanny_edge_detection.so -lopencv_core -lopencv_imgproc -lopencv_highgui CannyEdgeDetectionCtypes.cpp