Skip to content

Instantly share code, notes, and snippets.

View miguelmalvarez's full-sized avatar

Miguel Martinez-Alvarez miguelmalvarez

View GitHub Profile
(ns clojure-elasticsearch.core
(:require [clojurewerkz.elastisch.rest :as esr]
[clojurewerkz.elastisch.rest.index :as idx]
[clojurewerkz.elastisch.rest.document :as doc]
[clojurewerkz.elastisch.query :as q]))
(def index-name "test")
(def type "articles")
(def query "third")
(ns ibmwatson.text
(:require [clj-http.client :as client]))
(defn- text-call
[text endpoint params]
(let [root_url (str "http://access.alchemyapi.com/calls/text/" endpoint)]
(client/get root_url {:query-params params})))
(defn entities
[text api_key]
@miguelmalvarez
miguelmalvarez / run.py
Created March 20, 2015 09:32
Represent Reuters21578
from nltk import word_tokenize
from nltk.corpus import reuters
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import PorterStemmer
import re
from nltk.corpus import stopwords
cachedStopWords = stopwords.words("english")
def tokenize(text):
@miguelmalvarez
miguelmalvarez / kaggle_digits_23-02-2015.py
Last active September 11, 2018 07:01
kaggle_digits.py
import pandas as pd
import numpy as np
import logging
import time
import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
import pandas as pd
import logging
import time
import datetime
import numpy as np
def log_info(message):
def read_csv(file_path, has_header = True):
with open(file_path) as f:
if has_header: f.readline()
data = []
for line in f:
line = line.strip().split(",")
data.append([float(x) for x in line])
return data
def write_csv(file_path, data):
from sklearn.ensemble import RandomForestClassifier
import csv_io
def main():
#Read in the training data and train the model
train_data = csv_io.read_csv("data/train.csv")
#the first column of the training set will be the judgements
judgements = [str(int (x[0])) for x in train_data]
train_instances = [x[1:] for x in train_data]