Boyan Angelov boyanangelov

## tools.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                boyanangelov
                / tools.md
            
            
              Created
              March 15, 2019 11:50
            
              
                Optimal Tools for ML/AI
              
          
dplyr
pandas
ggplot2
plotly
seaborn
mlr
scikit-learn
yellowbrick
xgboost
keras


## tim_show_scraper.py
from urllib.request import urlopen
from bs4 import BeautifulSoup
from collections import Counter
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from __future__ import division
from nltk import FreqDist
from tqdm import tqdm

## optimal_tools_for_ml.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              1 star
            
          
                boyanangelov
                / optimal_tools_for_ml.md
            
            
              Last active
              December 23, 2022 07:49
            
              
                Optimal Tools for ML/AI
              
          
    Optimal Tools for ML and AI

Languages


R
Python
Julia

R packages


dplyr: cleaning / transofrming
plotly: visualization


## .gitignore
# R stuff
.Rproj.user
.Rhistory
.RData
.Ruserdata

# datatypes
*csv
*tsv
*xls

## nnet_plot_update.r
plot.nnet<-function(mod.in,nid=T,all.out=T,all.in=T,bias=T,wts.only=F,rel.rsc=5,
                    circle.cex=5,node.labs=T,var.labs=T,x.lab=NULL,y.lab=NULL,
                    line.stag=NULL,struct=NULL,cex.val=1,alpha.val=1,
                    circle.col='lightblue',pos.col='black',neg.col='grey',
                    bord.col='lightblue', max.sp = F,...){

  require(scales)

  #sanity checks
  if('mlp' %in% class(mod.in)) warning('Bias layer not applicable for rsnns object')

## Spark+ipython_on_MacOS.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                boyanangelov
                / Spark+ipython_on_MacOS.md
            
            
              Created
              February 29, 2016 20:38
                — forked from ololobus/Spark+ipython_on_MacOS.md
            
              
                Apache Spark installation + ipython notebook integration guide for Mac OS X
              
          
    Apache Spark installation + ipython notebook integration guide for Mac OS X

Tested with Apache Spark 1.3.1, Python 2.7.9 and Java 1.8.0_45 + workaround for Spark 1.4.x from @enahwe.
Install Java Development Kit

Download and install it from oracle.com

  
## svmflag.py
import numpy as np
import pylab as pl
import pandas as pd

from sklearn import svm
from sklearn import linear_model
from sklearn import tree

from sklearn.metrics import confusion_matrix

## inference.r
function(y, x = NULL,
                      est = c("mean", "median", "proportion"),
                      success = NULL, order = NULL,
                      method = c("theoretical","simulation"),
                      type = c("ci","ht"),
                      alternative = c("less","greater","twosided"),
                      null = NULL,
                      boot_method = c("perc","se"),
                      conflevel = 0.95, siglevel = 0.05,
                      nsim = 10000, simdist = FALSE, seed = NULL,

## maps-ipython.py
%pylab inline
from pylab import *

pylab.rcParams['figure.figsize'] = (8.0, 6.4)

from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
map = Basemap(projection='ortho', lat_0=50, lon_0=-100,
              resolution='l', area_thresh=1000.0)

## ds-training.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                boyanangelov
                / ds-training.md
            
            
              Last active
              August 29, 2015 14:27
                — forked from hadley/ds-training.md
            
              
                My advise on what you need to do to become a data scientist...
              
          
If you were to give recommendations to your "little brother/sister" on things
that they need to do to become a data scientist, what would those things be?

I think the "Data Science Venn Diagram" (http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram) is a great place to start. You need three things to be a good data scientist:

Statistical knowledge
Programming/hacking skills
Domain expertise

Statistical knowledge
	from urllib.request import urlopen
	from bs4 import BeautifulSoup
	from collections import Counter
	import pickle
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from __future__ import division
	from nltk import FreqDist
	from tqdm import tqdm
	# R stuff
	.Rproj.user
	.Rhistory
	.RData
	.Ruserdata

	# datatypes
	*csv
	*tsv
	*xls
	plot.nnet<-function(mod.in,nid=T,all.out=T,all.in=T,bias=T,wts.only=F,rel.rsc=5,
	circle.cex=5,node.labs=T,var.labs=T,x.lab=NULL,y.lab=NULL,
	line.stag=NULL,struct=NULL,cex.val=1,alpha.val=1,
	circle.col='lightblue',pos.col='black',neg.col='grey',
	bord.col='lightblue', max.sp = F,...){

	require(scales)

	#sanity checks
	if('mlp' %in% class(mod.in)) warning('Bias layer not applicable for rsnns object')
	import numpy as np
	import pylab as pl
	import pandas as pd

	from sklearn import svm
	from sklearn import linear_model
	from sklearn import tree

	from sklearn.metrics import confusion_matrix
	function(y, x = NULL,
	est = c("mean", "median", "proportion"),
	success = NULL, order = NULL,
	method = c("theoretical","simulation"),
	type = c("ci","ht"),
	alternative = c("less","greater","twosided"),
	null = NULL,
	boot_method = c("perc","se"),
	conflevel = 0.95, siglevel = 0.05,
	nsim = 10000, simdist = FALSE, seed = NULL,
	%pylab inline
	from pylab import *

	pylab.rcParams['figure.figsize'] = (8.0, 6.4)

	from mpl_toolkits.basemap import Basemap
	import matplotlib.pyplot as plt
	import numpy as np
	map = Basemap(projection='ortho', lat_0=50, lon_0=-100,
	resolution='l', area_thresh=1000.0)