Wil wilmeragsgh

## ner_training_usage.py
# assuming training_data as an array of the form [raw_text,[(init_char, end_char, "LABEL")]] where the internal array can contain multiple labels per raw_text

import spacy
from spacy.tokens import DocBin

nlp = spacy.blank(lang) # lang refers to a spacy model language, ex (en, es, ...)
db = DocBin()
errors = []
for text, annotations in training_data[:train_size]:
    doc = nlp(text)

## test_pyspark_dep.py
# Maybe required:
# import os
# os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
# os.environ["SPARK_HOME"] = "/opt/spark"
# Test installation
# import findspark
# findspark.init()

import pyspark
import random

## spark_standalone_dep.sh
apt-get install openjdk-8-jdk-headless
SPARK_RELEASE=spark-3.0.0-preview2 # Update as required from here https://spark.apache.org/downloads.html
HADOOP_VERSION=hadoop2.7 # Update as required
SPARK_DOWNLOADER_FILENAME=$SPARK_RELEASE-bin-$HADOOP_VERSION
wget -q https://www.apache.org/dyn/closer.lua/spark/$SPARK_RELEASE/$SPARK_DOWNLOADER_FILENAME.tgz
tar -xzf $SPARK_DOWNLOADER_FILENAME.tgz
mv $SPARK_DOWNLOADER_FILENAME /opt/$SPARK_RELEASE
ln -s /opt/$SPARK_RELEASE /opt/spark
export SPARK_HOME=/opt/spark
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64

## software_for_data_scientists.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                wilmeragsgh
                / software_for_data_scientists.md
            
            
              Last active
              May 1, 2020 23:29
                — forked from stared/software_for_scientists.md
            
              
                Software for data scientists: Adapted list of general-purpose software i use for data science activities.
              
          
    General Purpose software for data scientists

Adapted version from Software for scientists
Some things takes much less time and stress once you know the right tool.
Below, there is my adaptation to a list of software for scientists for data scientists and when to use them.
Text editors


## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                wilmeragsgh
                / keybase.md
            
            
              Created
              May 1, 2020 16:55
            
          
    Keybase proof

I hereby claim:

I am wilmeragsgh on github.
I am wilmerags (https://keybase.io/wilmerags) on keybase.
I have a public key ASCYTXV75-4C4NOoXkraKWvPgAxKkEfEGQehxtwDhgirZgo

To claim this, I am signing this object:

  
## .Rprofile
# Remember to put it into '~/'
# or associate a .Rproject where the dir is

## Create a new invisible environment for all the functions to go in so it doesn't clutter your workspace.
.env <- new.env()

## Returns a logical vector TRUE for elements of X not in Y
.env$"%nin%" <- function(x, y) !(x %in% y)

.env$lib <- function(pkg,with.devtools = F,devtools.source = 'github', ...){

## robomongo_dep.sh
wget https://download.robomongo.org/0.9.0/linux/robomongo-0.9.0-linux-x86_64-0786489.tar.gz
tar -xvzf robomongo-0.9.0-linux-x86_64-0786489.tar.gz
mkdir /usr/local/bin/robomongo
mv  robomongo-0.9.0-linux-x86_64-0786489/* /usr/local/bin/robomongo
cd /usr/local/bin/robomongo/bin
#sudo chmod +x robomongo ## run command only if robomongo isn't excutable file
#./robomongo
# ref: http://askubuntu.com/questions/739297/how-to-install-robomongo-ubuntu-system-please-let-me-know/781793

## mongo_deb.sh
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927
echo "deb http://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.2 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-3.2.list
apt-get update
apt-get install -y mongodb-org

echo "[Unit]
Description=High-performance, schema-free document-oriented database
After=network.target
[Service]
User=mongodb

## texmaker_dep.sh
add-apt-repository ppa:tsvetko.tsvetkov/trusty-backports
apt-get update
apt-get install texmaker

## jupyter_dep.sh
apt-get install python-pip && pip install --upgrade pip && pip install jupyter
	# assuming training_data as an array of the form [raw_text,[(init_char, end_char, "LABEL")]] where the internal array can contain multiple labels per raw_text

	import spacy
	from spacy.tokens import DocBin

	nlp = spacy.blank(lang) # lang refers to a spacy model language, ex (en, es, ...)
	db = DocBin()
	errors = []
	for text, annotations in training_data[:train_size]:
	doc = nlp(text)
	# Maybe required:
	# import os
	# os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
	# os.environ["SPARK_HOME"] = "/opt/spark"
	# Test installation
	# import findspark
	# findspark.init()

	import pyspark
	import random
	apt-get install openjdk-8-jdk-headless
	SPARK_RELEASE=spark-3.0.0-preview2 # Update as required from here https://spark.apache.org/downloads.html
	HADOOP_VERSION=hadoop2.7 # Update as required
	SPARK_DOWNLOADER_FILENAME=$SPARK_RELEASE-bin-$HADOOP_VERSION
	wget -q https://www.apache.org/dyn/closer.lua/spark/$SPARK_RELEASE/$SPARK_DOWNLOADER_FILENAME.tgz
	tar -xzf $SPARK_DOWNLOADER_FILENAME.tgz
	mv $SPARK_DOWNLOADER_FILENAME /opt/$SPARK_RELEASE
	ln -s /opt/$SPARK_RELEASE /opt/spark
	export SPARK_HOME=/opt/spark
	export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
	# Remember to put it into '~/'
	# or associate a .Rproject where the dir is

	## Create a new invisible environment for all the functions to go in so it doesn't clutter your workspace.
	.env <- new.env()

	## Returns a logical vector TRUE for elements of X not in Y
	.env$"%nin%" <- function(x, y) !(x %in% y)

	.env$lib <- function(pkg,with.devtools = F,devtools.source = 'github', ...){
	wget https://download.robomongo.org/0.9.0/linux/robomongo-0.9.0-linux-x86_64-0786489.tar.gz
	tar -xvzf robomongo-0.9.0-linux-x86_64-0786489.tar.gz
	mkdir /usr/local/bin/robomongo
	mv robomongo-0.9.0-linux-x86_64-0786489/* /usr/local/bin/robomongo
	cd /usr/local/bin/robomongo/bin
	#sudo chmod +x robomongo ## run command only if robomongo isn't excutable file
	#./robomongo
	# ref: http://askubuntu.com/questions/739297/how-to-install-robomongo-ubuntu-system-please-let-me-know/781793
	apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927
	echo "deb http://repo.mongodb.org/apt/ubuntu xenial/mongodb-org/3.2 multiverse" \| tee /etc/apt/sources.list.d/mongodb-org-3.2.list
	apt-get update
	apt-get install -y mongodb-org

	echo "[Unit]
	Description=High-performance, schema-free document-oriented database
	After=network.target
	[Service]
	User=mongodb
	add-apt-repository ppa:tsvetko.tsvetkov/trusty-backports
	apt-get update
	apt-get install texmaker