Skip to content

Instantly share code, notes, and snippets.

View saraswatmks's full-sized avatar
💭
I may be slow to respond.

Manish Saraswat saraswatmks

💭
I may be slow to respond.
View GitHub Profile
@saraswatmks
saraswatmks / gradient_descent
Last active March 3, 2017 12:30
gradient_descent
import numpy as np
import random
def gradient_descent(alpha, x, y, ep=0.0001, max_iter=10000):
converged = False
iter = 0
m = x.shape[0] # number of samples
# initial theta
@saraswatmks
saraswatmks / ML_Challenge_0.93
Last active March 26, 2017 06:39
ML_Challenge_01
path <- "/home//Desktop/MLC01/"
setwd(path)
# Load data and libraries -------------------------------------------------
library(data.table)
library(h2o)
library(caret)
library(stringr)
library(e1071)
@saraswatmks
saraswatmks / readfiles.R
Created May 30, 2017 07:33
Read files in R
library(data.table)
library(jsonlite)
#### Train
train <- fromJSON("train_data.json")
train_data <- data.table(ID = unlist(names(train)))
train_data[, `:=` (genres = unlist(lapply(train, '[',1)),
titles = unlist(lapply(train, '[',2)),
@saraswatmks
saraswatmks / readfiles.py
Created May 30, 2017 09:19
Json to Py Dataframe
# coding: utf-8
import json
import pandas as pd
file_name_1 = "train_data.json"
with open(file_name_1, 'r') as jsonfile1:
data_dict_1 = json.load(jsonfile1)
file_name_2 = "test_data.json"
with open(file_name_2, 'r') as jsonfile2:
@saraswatmks
saraswatmks / between.py
Created June 25, 2017 07:45
Pandas Update a Column Based on Range
table_1 = pd.DataFrame({'Slab_ID': np.repeat('1_1',19), 'ActCastLength':np.arange(1.1,3,0.1)})
table_2 = pd.DataFrame({'Slab_ID': ['1_1','1_1'], 'DefectStart_Y':[1.5,2.3],\
'DefectEnd_Y':[1.8,2.5], 'Defective':[1,1]})
for c in np.arange(2):
table_1['Defective' + str(c)] = table_1['ActCastLength'].between(table_2.loc[c,'DefectStart_Y'],\
table_2.loc[c, 'DefectEnd_Y'])
table_1['Defective' + str(c)] = [1 if x == True else 0 for x in table_1['Defective'+str(c)]]
@saraswatmks
saraswatmks / address file
Last active July 5, 2017 06:29
Address File
library(data.table)
sdata <- fread("address_data.csv")
head(sdata)
setnames(sdata,"x","address")
# some cleaning
sdata$pincodes := unlist(regmatches(address, gregexpr("(\\d+){6}",sdata$address)))]
sdata$address := gsub(pattern = "(\\d+){6}",replacement = "", x = sdata$address)]
@saraswatmks
saraswatmks / HirakSolution.py
Created July 6, 2017 05:19
This solution calculates the value of process grouped by count of unique step nos * 0.1
# coding: utf-8
# load libary - you might need to install xlrd library in case it throws any error in reading file
import pandas as pd
# read file
sensor = pd.read_excel("SENSOR-009.xlsx",sheetname="Sheet1")
@saraswatmks
saraswatmks / ndcg@10
Last active July 18, 2017 13:00
normalized discounted checker
#!/usr/bin/env python
import csv
import json
import sys
import numpy as np
LOCAL = True
def calculate_ndcg(test_movies, predicted_movies, already_watched):
partial_dcgs = []
predicted_event_flags = {}
actual_events = test_movies
for pred_index, predicted_event in enumerate(predicted_movies):
try:
last_occ = predicted_event_flags.get(predicted_event, -1)
actual_index = actual_events[last_occ+1:].index(predicted_event)
effective_index = actual_index + last_occ + 1
predicted_event_flags[predicted_event] = effective_index
#include <iostream>
#include <cstring>
using namespace std;
void parseDot(char s[100], int &first, int &second) {
int length = strlen(s);
int temp = 0, dotFound = 0, negative = 1;
for (int i = 0; i < length; i++) {
if (s[i] == '.') {