This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals | |
from nltk.tokenize import RegexpTokenizer | |
import argparse | |
import os | |
""" | |
Script for tokenizing Portuguese text according to the Universal Dependencies | |
(UD) tokenization standards. This script was not created by the UD team; it was |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
max(-x,-y) = -min(x,y) | |
min(-x,-y) = -max(x,y) | |
abs(x) = abs(-x) | |
abs(x) = max(x,-x) = -min(x,-x) | |
abs(x*a) = if (a >= 0) abs(x)*a | |
(a < 0) -abs(x)*a | |
// basically any commutative operation | |
min(x,y) + max(x,y) = x + y |