Skip to content

Instantly share code, notes, and snippets.

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from category_encoders import OneHotEncoder
@yabyzq
yabyzq / R anomaly detection
Created June 18, 2018 12:59
R anomaly detection
#Install packages
install_github("petermeissner/wikipediatrend")
install_github("twitter/AnomalyDetection")
library(devtools)
library(Rcpp)
library(wikipediatrend)
library(AnomalyDetection)
#load data
@yabyzq
yabyzq / Forecast methods
Last active June 3, 2018 22:24
forecast
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import time
from numpy import newaxis
import math
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
@yabyzq
yabyzq / Model Explainer
Last active May 31, 2018 13:48
R - model explainer
#xgboost explainer
#library(devtools)
#install_github("AppliedDataSciencePartners/xgboostExplainer")
library(xgboost)
library(xgboostExplainer)
#getting data
set.seed(123)
data(agaricus.train, package='xgboost')
@yabyzq
yabyzq / LSTM
Created May 22, 2018 14:45
LSTM Time Series - Start Script
from datetime import date, timedelta
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers import LSTM
from keras import callbacks
from keras.callbacks import ModelCheckpoint
#Load data
import pandas as pd
white = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=';')
red = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')
#define Target
red['type'] = 1
white['type'] = 0
wines = red.append(white, ignore_index = True)
@yabyzq
yabyzq / doc_prediction
Created April 4, 2018 07:48
document prediction
import PyPDF2
def read_pdfs(pdf_file_name):
pdf = PyPDF2.PdfFileReader(open(pdf_file_name,'rb'))
num_pages = pdf.numPages
count = 0
@yabyzq
yabyzq / Data.table
Created August 17, 2017 13:41
Data.table
library(data.table)
#download data
flights <- fread("https://raw.githubusercontent.com/wiki/arunsrinivasan/flights/NYCflights14/flights14.csv")
flights
#Subset
flights[origin == "JFK" & month == 6L] # by column
flights[1:2] #by row
@yabyzq
yabyzq / TFIDF examples
Created July 10, 2017 10:48
TFIDF examples
library(RODBC)
library(tm)
library(wordcloud)
library(ggplot2)
library(ROracle)
a <- read.csv(file = 'C:/Users/eye1/Desktop/text.csv')
names(a) <- 'feedback'
narrative <- a$feedback
str(narrative)
@yabyzq
yabyzq / Ensembling
Created July 10, 2017 10:46
Ensembling
library(caret)
set.seed(1)
data<-read.csv(url('https://datahack-prod.s3.ap-south-1.amazonaws.com/train_file/train_u6lujuX_CVtuZ9i.csv'))
preProcValues <- preProcess(data, method = c("medianImpute","center","scale"))
library('RANN')
data_processed <- predict(preProcValues, data)