Jordan Frank jwf-zz

## imdb-sentiment-vw.sh
#!/bin/bash

# Requires vw (https://github.com/JohnLangford/vowpal_wabbit/wiki/),
# the IMDB dataset (http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz),
# and the perf utility from http://osmot.cs.cornell.edu/kddcup/software.html.

cat aclImdb/train/labeledBow.feat | \
  sed -n 's/^\([7-9]\|10\)\s/&/p' | \
  sed -e "s/^\([7-9]\|10\)\s//" | \
  awk '{ print "1 '"'"'pos_" (NR-1) " |features " $0}' > train.vw

## print_words.py
#!/usr/bin/env python
import sys

Dict = []
with open('aclImdb/imdb.vocab','r') as f:
    for line in f:
        Dict.append(line.strip())

with open('audit.log','r') as f:
    f.readline()
	#!/bin/bash

	# Requires vw (https://github.com/JohnLangford/vowpal_wabbit/wiki/),
	# the IMDB dataset (http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz),
	# and the perf utility from http://osmot.cs.cornell.edu/kddcup/software.html.

	cat aclImdb/train/labeledBow.feat \| \
	sed -n 's/^\([7-9]\\|10\)\s/&/p' \| \
	sed -e "s/^\([7-9]\\|10\)\s//" \| \
	awk '{ print "1 '"'"'pos_" (NR-1) " \|features " $0}' > train.vw
	#!/usr/bin/env python
	import sys

	Dict = []
	with open('aclImdb/imdb.vocab','r') as f:
	for line in f:
	Dict.append(line.strip())

	with open('audit.log','r') as f:
	f.readline()