Skip to content

Instantly share code, notes, and snippets.

View saraswatmks's full-sized avatar
💭
I may be slow to respond.

Manish Saraswat saraswatmks

💭
I may be slow to respond.
View GitHub Profile
@saraswatmks
saraswatmks / gradient_descent
Last active March 3, 2017 12:30
gradient_descent
import numpy as np
import random
def gradient_descent(alpha, x, y, ep=0.0001, max_iter=10000):
converged = False
iter = 0
m = x.shape[0] # number of samples
# initial theta
@saraswatmks
saraswatmks / ML_Challenge_0.93
Last active March 26, 2017 06:39
ML_Challenge_01
path <- "/home//Desktop/MLC01/"
setwd(path)
# Load data and libraries -------------------------------------------------
library(data.table)
library(h2o)
library(caret)
library(stringr)
library(e1071)
@saraswatmks
saraswatmks / TextMining_Tutorial.R
Created April 4, 2017 11:28
Text Mining Tutorial on Kaggle DataSet
library(data.table)
library(jsonlite)
library(purrr)
library(RecordLinkage)
library(stringr)
library(tm)
traind <- fromJSON("train.json")
test <- fromJSON("test.json")
@saraswatmks
saraswatmks / readfiles.R
Created May 30, 2017 07:33
Read files in R
library(data.table)
library(jsonlite)
#### Train
train <- fromJSON("train_data.json")
train_data <- data.table(ID = unlist(names(train)))
train_data[, `:=` (genres = unlist(lapply(train, '[',1)),
titles = unlist(lapply(train, '[',2)),
@saraswatmks
saraswatmks / readfiles.py
Created May 30, 2017 09:19
Json to Py Dataframe
# coding: utf-8
import json
import pandas as pd
file_name_1 = "train_data.json"
with open(file_name_1, 'r') as jsonfile1:
data_dict_1 = json.load(jsonfile1)
file_name_2 = "test_data.json"
with open(file_name_2, 'r') as jsonfile2:
@saraswatmks
saraswatmks / between.py
Created June 25, 2017 07:45
Pandas Update a Column Based on Range
table_1 = pd.DataFrame({'Slab_ID': np.repeat('1_1',19), 'ActCastLength':np.arange(1.1,3,0.1)})
table_2 = pd.DataFrame({'Slab_ID': ['1_1','1_1'], 'DefectStart_Y':[1.5,2.3],\
'DefectEnd_Y':[1.8,2.5], 'Defective':[1,1]})
for c in np.arange(2):
table_1['Defective' + str(c)] = table_1['ActCastLength'].between(table_2.loc[c,'DefectStart_Y'],\
table_2.loc[c, 'DefectEnd_Y'])
table_1['Defective' + str(c)] = [1 if x == True else 0 for x in table_1['Defective'+str(c)]]
@saraswatmks
saraswatmks / address file
Last active July 5, 2017 06:29
Address File
library(data.table)
sdata <- fread("address_data.csv")
head(sdata)
setnames(sdata,"x","address")
# some cleaning
sdata$pincodes := unlist(regmatches(address, gregexpr("(\\d+){6}",sdata$address)))]
sdata$address := gsub(pattern = "(\\d+){6}",replacement = "", x = sdata$address)]
@saraswatmks
saraswatmks / HirakSolution.py
Created July 6, 2017 05:19
This solution calculates the value of process grouped by count of unique step nos * 0.1
# coding: utf-8
# load libary - you might need to install xlrd library in case it throws any error in reading file
import pandas as pd
# read file
sensor = pd.read_excel("SENSOR-009.xlsx",sheetname="Sheet1")
@saraswatmks
saraswatmks / ndcg@10
Last active July 18, 2017 13:00
normalized discounted checker
#!/usr/bin/env python
import csv
import json
import sys
import numpy as np
LOCAL = True
@saraswatmks
saraswatmks / string.py
Created September 1, 2017 12:36
python string
strn = 'abcbaabbcc'
d = {}
k = 2
prev_word = strn[0]
c = 1
for s in strn[1:]:
if prev_word == s:
c+=1
print('this is c = {} in first loop'.format(c))