Skip to content

Instantly share code, notes, and snippets.

View arademaker's full-sized avatar
🎯
Focusing

Alexandre Rademaker arademaker

🎯
Focusing
View GitHub Profile
@arademaker
arademaker / output
Last active September 28, 2015 17:24
$ awk '{print $2, $2, $4, "#"}' train.pt | /media/data/src/freeling/src/utilities/train-tagger/bin/change-corpus-dict pt /usr/local/share/freeling/
O O DET # o DA0MS0 0.946534 o PD0MS000 0.0337339 o PP3MSA00 0.0197186 o NCMS000 1.38766e-05
objetivo objetivo NOUN # objetivo AQ0MS0 0.554567 objetivo NCMS000 0.390889 objetivo NP00000 0.0545441
dos dos ADP # de SPS00 1
principais principais ADJ # principal AQ0CP0 0.99061 principais NCMP000 0.00469484 principal NCMP000 0.00469484
hotéis hotéis NOUN # hotel NCMP000 1
da da ADP # de SPS00 1
cidade cidade NOUN # cidade NCFS000 1
é é VERB # ser VMIP3S0 1
que que CONJ # que PR0CN000 0.586684 que CS 0.39617 que PT0CN000 0.0167849 que PE0CN000 0.000361319
@arademaker
arademaker / monty-hall.R
Created March 17, 2012 21:53
Monty Hall Problem in R
portas <- function (t){
teste <- 0
amostra <- 0
while (teste < t){
# dist é a distribuição dos conteúdos das portas, 1 marca a porta
# premiada
dist <- c(1,0,0)
dist <- dist[sample(3)]
a <- sample(3,1) # a será a porta que você escolhe
@arademaker
arademaker / chapter-3-ci.R
Created May 22, 2012 11:42
Script Gustavo L. A. chapter 3 of Collective Intelligence using R
corpus2 <- lapply(corpus, getWords)
tira.erro <- sapply(corpus2, length) > 0
corpus2 <- corpus2[tira.erro]
apcount <- table(unlist(lapply(corpus2, names)))
apcount.df <- as.data.frame(apcount, stringsAsFactors = FALSE)
apcount.df$f <- apcount.df$Freq / length(corpus2)
@arademaker
arademaker / crawler.R
Created June 2, 2012 12:47
A simple crawler and indexer based on Collective Intelligence chapter 4
library(RCurl)
library(XML)
joinURL <- function(base, u){
sbase <- parseURI(base)
su <- parseURI(u)
if( su$scheme != "")
return(u)
su$scheme <- sbase$scheme
@arademaker
arademaker / sentences.py
Created July 19, 2012 23:59
count of sentences in a set of markdown files
# A simple script to calculate the number of sentences per markdown
# file in a given directory.
#
# Reference:
# - http://nltk.googlecode.com/svn/trunk/doc/howto/portuguese_en.html
# Author: Alexandre Rademaker
import os
import glob
import re
@arademaker
arademaker / statistics.py
Created July 26, 2012 14:36
simple statistics in a markdown corpora
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import glob
import re
import codecs
import nltk
from random import *
from nltk.probability import *
@arademaker
arademaker / test.lisp
Created November 3, 2015 10:14
reading CPDOC headers in CL
CL-USER> (pushnew "/opt/local/lib/" cffi:*foreign-library-directories* :test #'equal)
("/opt/local/lib/" (CFFI::EXPLODE-PATH-ENVIRONMENT-VARIABLE "LD_LIBRARY_PATH")
(CFFI::EXPLODE-PATH-ENVIRONMENT-VARIABLE "DYLD_LIBRARY_PATH") (UIOP/OS:GETCWD)
(CFFI::DARWIN-FALLBACK-LIBRARY-PATH))
CL-USER> (ql:quickload :cl-yaml)
To load "cl-yaml":
Load 1 ASDF system:
cl-yaml
; Loading "cl-yaml"
@arademaker
arademaker / erc.el
Created November 26, 2015 10:12
erc config in .emacs
;; ERC
(erc-autojoin-mode t)
(setq erc-save-buffer-on-part nil
erc-save-queries-on-quit nil
erc-log-write-after-send t
erc-log-write-after-insert t
erc-echo-notices-in-minibuffer-flag t
erc-log-channels-directory "~/.erc/logs/"
erc-autojoin-channels-alist '(("freenode.net" "#lisp")
("wnpt.brlcloud.com" "#brl")))
@arademaker
arademaker / glossas.lisp
Last active January 19, 2016 21:14
glosas and examples
(defun group-by (alist n &optional res)
(if (null alist)
(reverse res)
(group-by (subseq alist n) n (cons (subseq alist 0 n) res))))
(defun split-gloss (gloss)
(let* ((re "[ ]*;[ ]*\"[^\"]+\"")
(pos (cl-ppcre:all-matches re gloss))
(bw '(#\; #\" #\Space)))
(if pos