>>> from nltk import tokenize >>> para = "Hello. My name is Jacob. Today you'll be learning NLTK." >>> sents = tokenize.sent_tokenize(para) >>> sents ['Hello.', 'My name is Jacob.', "Today you'll be learning NLTK."]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
deb http://kali.cs.nctu.edu.tw/ /kali main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/ /wheezy main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/kali kali-dev main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/kali kali-dev main/debian-installer | |
deb-src http://kali.cs.nctu.edu.tw/kali kali-dev main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/kali kali main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/kali kali main/debian-installer | |
deb-src http://kali.cs.nctu.edu.tw/kali kali main contrib non-free | |
deb http://kali.cs.nctu.edu.tw/kali-security kali/updates main contrib non-free | |
deb-src http://kali.cs.nctu.edu.tw/kali-security kali/updates main contrib non-free |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, re | |
from string import join | |
import requests | |
r = requests.get("http://www.co.kane.il.us/TaxAssessment/Treasurer.aspx?parcelnumber=" + sys.argv[1]) | |
text = r.text | |
ADDRESS = """<span id="lblPropertyAddress[0-9]" style="display:inline-block;">([A-Za-z0-9 ,-.]*)</span><br />""" | |
TAXES = """TaxYear=(\d\d\d\d)">(\d\d\d\d|Current Year)</a></td><td>([0-9.]*)</td>""" | |
RATE = """<span id="lblTaxRate" style="display:inline-block;"><font face="Arial">([0-9.]*)</font></span><br />""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list = ls(all = TRUE)) #CLEAR WORKSPACE | |
library(quantmod) | |
#Scrape data from the website | |
library(XML) | |
rawPMI <- readHTMLTable('http://www.ism.ws/ISMReport/content.cfm?ItemNumber=10752') | |
PMI <- data.frame(rawPMI[[1]]) | |
names(PMI)[1] <- 'Year' | |
#Reshape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################### | |
## ## | |
## Setup ## | |
## ## | |
############################################################################################### | |
# install.packages("Rfacebook") # from CRAN | |
# install.packages("Rook") # from CRAN | |
# install.packages("igraph") # from CRAN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download data set via: | |
# http://archive.ics.uci.edu/ml/datasets/Statlog+%28German+Credit+Data%29 | |
# | |
# Note, the example below uses the pre-processed data that is used in the book 'Machine Learning with R' by Brett Lantz | |
library(C50) | |
df <- read.csv("credit.csv", stringsAsFactors=TRUE) | |
set.seed(12345) | |
df_rand <- df[order(runif(1000)),] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
with open('sample.txt', 'r') as f: | |
sample = f.read() | |
sentences = nltk.sent_tokenize(sample) | |
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] | |
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] | |
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Script tags POS and NER[Named Entity Recognition] for a supplied text file. | |
#Date: Nov 2 2012 | |
#Author: Hota Sobhan | |
import nltk | |
f = open('C:\Python27\Test_File.txt') | |
data = f.readlines() | |
#Parse the text file for NER with POS Tagging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convertCSVtoJSON(input): #pass the name of the input csv file | |
f = open(input, 'r') | |
j = open('.tempJSON', 'w') | |
fieldnames = ("field1,field2,field3") | |
reader = csv.DictReader(f, fieldnames) | |
for row in reader: | |
json.dump(row, j) | |
j.write('\n') | |
f.close() | |
j.close() |
http://www.neo4j.org/develop/linked_data http://neo4j.com/blog/and-now-for-something-completely-different-using-owl-with-neo4j/?_ga=1.82889430.1907739431.1411998329 http://losangelesindustries.tumblr.com/post/41701508265/loading-dbpedia-into-neo4j-with-clojure http://michaelbloggs.blogspot.de/2013/05/importing-ttl-turtle-ontologies-in-neo4j.html http://www.neo4j.org/develop/clojure
OlderNewer