Ravish Chawla ravishchawla

## pcl.props
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">

  <ImportGroup Label="PropertySheets"/>

  <!-- User Macros -->
  <PropertyGroup Label="UserMacros">
    <PCL_ROOT Condition="'$(Platform)'=='Win32'">$(ProgramFiles)\PCL 1.7.2</PCL_ROOT>
    <PCL_ROOT Condition="'$(Platform)'=='x64'">$(ProgramW6432)\PCL 1.7.2</PCL_ROOT>
  </PropertyGroup>

## gist:54d78af72680550e43bb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ravishchawla
                / gist:54d78af72680550e43bb
            
            
              Last active
              October 6, 2015 01:10
            
          
    Arrays

Search in a rotated sorted arrya

We do a binary search. For each iteration, there are two different sorted arrays, one on the left, and one on the right. Its just a matter of checking in which sorted array our element lies in. We iterate on that side.
int low = 0;
int high = nums.length-1;
int mid;

  
## compviz.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ravishchawla
                / compviz.md
            
            
              Created
              October 7, 2015 12:23
            
          
    Hough Transform


Create a grid of parameter values
Each point votes for a set of parameters,
incrementing those values in grid
Find maximum or local maxima in grid

Good


Robust to outliers: each point votes separately
Fairly efficient (much faster than trying all sets of parameters)
Provides multiple good fits


## prbot.py
import time;
import datetime;
import requests;
import json;
import os;
import os.path;
import time;

'''
Bot to post Pull Request changes to a Slack channel.

## print_progress.py
import sys;

def print_progess(currentProgress, maxProgress, numberDashes, message = ""):
    progress = numberDashes * (currentProgress / maxProgress) + 1;
    dashes = "=" * int(progress) + "> "
    percentage = int(100 * currentProgress / maxProgress) + 1;
    if percentage > 100:
        percentage = 100;
    sys.stdout.write("\r " + "<" + message + "> " + dashes + "{0:.2f}".format(percentage) + "%");
    sys.stdout.flush();

## word_to_vec.py
# coding: utf-8

# # Training a Word2Vec Model on the Reddit Comments Dataset
#
# ### Ravish Chawla

# In[276]:

get_ipython().magic('matplotlib inline')

## medium_lstm_load_1.py
'''
Read reviews from a JSON-formatted file into an array.
'''
lines = [];
num_pos = 0; num_neg = 0; num_total = 75000;

with open('data/review.json', 'r') as f:
    for line in f:
        if (len(lines) >= (num_total * 2)):
            break;

## medium_lstm_clean_2.py
'''
Clean each document by removing unnecesary characters and splitting by space.
'''
def clean_document(doco):
    punctuation = string.punctuation + '\n\n';
    punc_replace = ''.join([' ' for s in punctuation]);
    doco_clean = doco.replace('-', ' ');
    doco_alphas = re.sub(r'\W +', '', doco_clean)
    trans_table = str.maketrans(punctuation, punc_replace);
    doco_clean = ' '.join([word.translate(trans_table) for word in doco_alphas.split(' ')]);

## medium_lstm_encode_3.py
# Use a Keras Tokenizer and fit on the sentences

tokenizer = Tokenizer();
tokenizer.fit_on_texts(sentences);
text_sequences = np.array(tokenizer.texts_to_sequences(sentences));
sequence_dict = tokenizer.word_index;
word_dict = dict((num, val) for (val, num) in sequence_dict.items());

# We get a map of encoding-to-word in sequence_dict

## medium_lstm_pad_4.py
# Truncate and Pad reviews at a Maximum cap of 60 words.
max_cap = 60;
X = pad_sequences(reviews_encoded, maxlen=max_cap, truncating='post')

# Obtain a One-hot Y array for each review label.
Y = np.array([[0,1] if '0' in label else [1,0] for label in labels])

# Get a randomized sequence of positions to shuffle reviews
np.random.seed(1024);
random_posits = np.arange(len(X))
	<?xml version="1.0" encoding="utf-8"?>
	<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">

	<ImportGroup Label="PropertySheets"/>

	<!-- User Macros -->
	<PropertyGroup Label="UserMacros">
	<PCL_ROOT Condition="'$(Platform)'=='Win32'">$(ProgramFiles)\PCL 1.7.2</PCL_ROOT>
	<PCL_ROOT Condition="'$(Platform)'=='x64'">$(ProgramW6432)\PCL 1.7.2</PCL_ROOT>
	</PropertyGroup>
	import time;
	import datetime;
	import requests;
	import json;
	import os;
	import os.path;
	import time;

	'''
	Bot to post Pull Request changes to a Slack channel.
	import sys;

	def print_progess(currentProgress, maxProgress, numberDashes, message = ""):
	progress = numberDashes * (currentProgress / maxProgress) + 1;
	dashes = "=" * int(progress) + "> "
	percentage = int(100 * currentProgress / maxProgress) + 1;
	if percentage > 100:
	percentage = 100;
	sys.stdout.write("\r " + "<" + message + "> " + dashes + "{0:.2f}".format(percentage) + "%");
	sys.stdout.flush();
	# coding: utf-8

	# # Training a Word2Vec Model on the Reddit Comments Dataset
	#
	# ### Ravish Chawla

	# In[276]:

	get_ipython().magic('matplotlib inline')
	'''
	Read reviews from a JSON-formatted file into an array.
	'''
	lines = [];
	num_pos = 0; num_neg = 0; num_total = 75000;

	with open('data/review.json', 'r') as f:
	for line in f:
	if (len(lines) >= (num_total * 2)):
	break;
	'''
	Clean each document by removing unnecesary characters and splitting by space.
	'''
	def clean_document(doco):
	punctuation = string.punctuation + '\n\n';
	punc_replace = ''.join([' ' for s in punctuation]);
	doco_clean = doco.replace('-', ' ');
	doco_alphas = re.sub(r'\W +', '', doco_clean)
	trans_table = str.maketrans(punctuation, punc_replace);
	doco_clean = ' '.join([word.translate(trans_table) for word in doco_alphas.split(' ')]);
	# Use a Keras Tokenizer and fit on the sentences

	tokenizer = Tokenizer();
	tokenizer.fit_on_texts(sentences);
	text_sequences = np.array(tokenizer.texts_to_sequences(sentences));
	sequence_dict = tokenizer.word_index;
	word_dict = dict((num, val) for (val, num) in sequence_dict.items());

	# We get a map of encoding-to-word in sequence_dict
	# Truncate and Pad reviews at a Maximum cap of 60 words.
	max_cap = 60;
	X = pad_sequences(reviews_encoded, maxlen=max_cap, truncating='post')

	# Obtain a One-hot Y array for each review label.
	Y = np.array([[0,1] if '0' in label else [1,0] for label in labels])

	# Get a randomized sequence of positions to shuffle reviews
	np.random.seed(1024);
	random_posits = np.arange(len(X))