Created
September 22, 2016 03:51
-
-
Save muziejus/bf0222ff5ac1a24bbee5e5eac9622c00 to your computer and use it in GitHub Desktop.
Getting Stanford NER up and running on DHBox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/sh | |
# This is a barebones and super brittle script by Moacir P. de Sá Pereira | |
# (@muziejus on GitHub) for installing part of the Stanford NLP library to a | |
# DHBox (http://dhbox.org) | |
# First, we need to upgrade Java | |
# from: http://stackoverflow.com/questions/30177455/moving-from-jdk-1-7-to-jdk-1-8-on-ubuntu | |
# Newer versions of java *are* available | |
echo "#### Installing Java 1.8 in /opt/jdk . . ." | |
wget --header "Cookie: oraclelicense=accept-securebackup-cookie" http://download.oracle.com/otn-pub/java/jdk/8u71-b15/jdk-8u71-linux-x64.tar.gz | |
mkdir /opt/jdk | |
tar -zxf jdk-8u71-linux-x64.tar.gz -C /opt/jdk/ | |
rm jdk-8u71-linux-x64.tar.gz | |
update-alternatives --install /usr/bin/java java /opt/jdk/jdk1.8.0_71/bin/java 1 | |
update-alternatives --install /usr/bin/javac javac /opt/jdk/jdk1.8.0_71/bin/javac 1 | |
echo "#### Below, choose the java 1.8 installation in /opt/jdk (typing '1' should do it)" | |
update-alternatives --config java | |
update-alternatives --config javac | |
echo "#### The lines below should mention Java 1.8." | |
java -version | |
javac -version | |
# Next, we download the appropriate Stanford NLP software. I'm interested in | |
# the Named Entity Recognition tagger. | |
# Note that this also downloads not necessarily the newest version of the | |
# software. | |
echo "#### Now installing the Stanford NER tagger" | |
wget www-nlp.stanford.edu/software/stanford-ner-2015-12-09.zip | |
unzip stanford* | |
rm *.zip | |
mv stanford* stanford-ner | |
echo "#### The stanford files are now available in: "`pwd`"/stanford-ner" | |
echo "####" | |
echo "#### Now installing nltk for Python 3. Ignore the errors." | |
pip3 install nltk | |
echo "####" | |
echo "#### Now installing the 'punkt' tokenizer for nltk." | |
python3 -c "import nltk; nltk.download('punkt')" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment