This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ awk '{print $2, $2, $4, "#"}' train.pt | /media/data/src/freeling/src/utilities/train-tagger/bin/change-corpus-dict pt /usr/local/share/freeling/ | |
O O DET # o DA0MS0 0.946534 o PD0MS000 0.0337339 o PP3MSA00 0.0197186 o NCMS000 1.38766e-05 | |
objetivo objetivo NOUN # objetivo AQ0MS0 0.554567 objetivo NCMS000 0.390889 objetivo NP00000 0.0545441 | |
dos dos ADP # de SPS00 1 | |
principais principais ADJ # principal AQ0CP0 0.99061 principais NCMP000 0.00469484 principal NCMP000 0.00469484 | |
hotéis hotéis NOUN # hotel NCMP000 1 | |
da da ADP # de SPS00 1 | |
cidade cidade NOUN # cidade NCFS000 1 | |
é é VERB # ser VMIP3S0 1 | |
que que CONJ # que PR0CN000 0.586684 que CS 0.39617 que PT0CN000 0.0167849 que PE0CN000 0.000361319 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
portas <- function (t){ | |
teste <- 0 | |
amostra <- 0 | |
while (teste < t){ | |
# dist é a distribuição dos conteúdos das portas, 1 marca a porta | |
# premiada | |
dist <- c(1,0,0) | |
dist <- dist[sample(3)] | |
a <- sample(3,1) # a será a porta que você escolhe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
corpus2 <- lapply(corpus, getWords) | |
tira.erro <- sapply(corpus2, length) > 0 | |
corpus2 <- corpus2[tira.erro] | |
apcount <- table(unlist(lapply(corpus2, names))) | |
apcount.df <- as.data.frame(apcount, stringsAsFactors = FALSE) | |
apcount.df$f <- apcount.df$Freq / length(corpus2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RCurl) | |
library(XML) | |
joinURL <- function(base, u){ | |
sbase <- parseURI(base) | |
su <- parseURI(u) | |
if( su$scheme != "") | |
return(u) | |
su$scheme <- sbase$scheme |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple script to calculate the number of sentences per markdown | |
# file in a given directory. | |
# | |
# Reference: | |
# - http://nltk.googlecode.com/svn/trunk/doc/howto/portuguese_en.html | |
# Author: Alexandre Rademaker | |
import os | |
import glob | |
import re |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import os | |
import glob | |
import re | |
import codecs | |
import nltk | |
from random import * | |
from nltk.probability import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> tmp.1 <- data[grep("person",data$lex..file2,invert=TRUE),] | |
> subset(tmp.1, rel == "nomlex:agent", c(1,3,4,5,6,7,8,9)) | |
word1 word2 synset1 synset2 lex..file1 lex..file2 suffix class | |
77 adaptar adaptador 00299580-v 02678897-n verb.change noun.artifact or REGULAR | |
103 fabricar fabricante 01653442-v 08060446-n verb.creation noun.group nte REGULAR | |
237 tripular tripulação 01089614-v 08273167-n verb.competition noun.group ção REGULAR | |
254 imigrar imigração 00413432-v 08413834-n verb.change noun.group ção REGULAR | |
273 transferir transferência 02220461-v 01107932-n verb.possession noun.act cia REGULAR | |
294 servir serviço 02541251-v 08186047-n verb.social noun.group o REGRESSIVE | |
357 roer roedor 01445597-v 02329401-n verb.contact noun.animal or REGULAR |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CL-USER> (pushnew "/opt/local/lib/" cffi:*foreign-library-directories* :test #'equal) | |
("/opt/local/lib/" (CFFI::EXPLODE-PATH-ENVIRONMENT-VARIABLE "LD_LIBRARY_PATH") | |
(CFFI::EXPLODE-PATH-ENVIRONMENT-VARIABLE "DYLD_LIBRARY_PATH") (UIOP/OS:GETCWD) | |
(CFFI::DARWIN-FALLBACK-LIBRARY-PATH)) | |
CL-USER> (ql:quickload :cl-yaml) | |
To load "cl-yaml": | |
Load 1 ASDF system: | |
cl-yaml | |
; Loading "cl-yaml" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; ERC | |
(erc-autojoin-mode t) | |
(setq erc-save-buffer-on-part nil | |
erc-save-queries-on-quit nil | |
erc-log-write-after-send t | |
erc-log-write-after-insert t | |
erc-echo-notices-in-minibuffer-flag t | |
erc-log-channels-directory "~/.erc/logs/" | |
erc-autojoin-channels-alist '(("freenode.net" "#lisp") | |
("wnpt.brlcloud.com" "#brl"))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defun group-by (alist n &optional res) | |
(if (null alist) | |
(reverse res) | |
(group-by (subseq alist n) n (cons (subseq alist 0 n) res)))) | |
(defun split-gloss (gloss) | |
(let* ((re "[ ]*;[ ]*\"[^\"]+\"") | |
(pos (cl-ppcre:all-matches re gloss)) | |
(bw '(#\; #\" #\Space))) | |
(if pos |