Skip to content

Instantly share code, notes, and snippets.

@brunoadacosta
Created March 6, 2012 21:39
Show Gist options
  • Save brunoadacosta/1989104 to your computer and use it in GitHub Desktop.
Save brunoadacosta/1989104 to your computer and use it in GitHub Desktop.
CommentsNormalizer
class CommentsNormalizer
BLACK_LIST = ["arma", "carro"]
SPECIAL_CHARS = {
"@" => "a",
"4" => "a",
"0" => "o",
"3" => "e",
"1" => "i",
"." => "",
"5" => "s"
}
def self.normalize(comment)
normalized_comment = []
comment.split(" ").each do |word|
word_with_no_special_char = self.remove_special_chars word
to_be_ommited = word_with_no_special_char if self.need_to_omitted? word_with_no_special_char
to_be_ommited = word_with_no_special_char.squeeze if self.need_to_omitted? word_with_no_special_char.squeeze
word = self.omit_blacklisted_words to_be_ommited if to_be_ommited
normalized_comment << word
end
normalized_comment.join(" ")
end
def self.remove_special_chars(word)
SPECIAL_CHARS.each do |special_char, letter|
word = word.gsub(special_char, letter)
end
word
end
def self.need_to_omitted?(word)
BLACK_LIST.include? word.downcase
end
def self.omit_blacklisted_words(word)
need_to_omitted?(word) ? "xxxx" : word
end
end
@brunoadacosta
Copy link
Author

#encoding: UTF-8
require 'spec_helper'

describe CommentsNormalizer do
  describe "Caracteres especiais" do
    it "deve remover caracteres especiais" do 
      CommentsNormalizer.remove_special_chars("4rm@").should eql "arma"
    end
  end

  describe "Omitir palavras" do
    it "deve omitir caso esteja na black list" do
      CommentsNormalizer::BLACK_LIST = ["arma"]

      CommentsNormalizer.omit_blacklisted_words("arma").should eql "xxxx"
    end

    it "não deve omitir caso não esteja na black list" do
      CommentsNormalizer::BLACK_LIST = ["carro"]

      CommentsNormalizer.omit_blacklisted_words("arma").should eql "arma" 
    end
  end

  describe "Comentários" do
    it "deve subistituir por 'xxxx' palavras da black list" do
      CommentsNormalizer::BLACK_LIST = ["porra", "caralho"]

      CommentsNormalizer.normalize("Essa porra do caralho funciona").should eql "Essa xxxx do xxxx funciona"
    end

    it "deve subistituir por 'xxxx' palavras da black list independente se maíuscula" do
      CommentsNormalizer::BLACK_LIST = ["porra", "caralho"]

      CommentsNormalizer.normalize("Essa PORRA do caralho funciona").should eql "Essa xxxx do xxxx funciona"
    end

    # it "deve subistituir por 'xxxx' palavras da black list se contiver caracteres duplicados" do
    #   CommentsNormalizer::BLACK_LIST = ["porra", "caralho"]
    #   
    #   CommentsNormalizer.normalize("Essa POORRAA do caralho funciona").should eql "Essa xxxx do xxxx funciona"
    # end

    it "deve subistituir por 'xxxx' palavras da black list mesmo com caracteres repetidos" do
      CommentsNormalizer::BLACK_LIST = ["porra", "caralho"]

      CommentsNormalizer.normalize("Essa PORRA do caraaalhooo funciona").should eql "Essa xxxx do xxxx funciona"
    end

    it "deve subistituir por 'xxxx' palavras com caracteres especiais mas que estejam na black list" do
      CommentsNormalizer::BLACK_LIST = ["porra", "caralho"]

      CommentsNormalizer.normalize("Essa p0rr@ do c4ralh0 funciona").should eql "Essa xxxx do xxxx funciona"
    end

    it "não deve alterar palavras com caracteres especiais caso não esteja na black list" do
      CommentsNormalizer::BLACK_LIST = ["caralho"]

      CommentsNormalizer.normalize("Essa p0rr4 do caralho funciona").should eql "Essa p0rr4 do xxxx funciona"
    end

    it "não deve alterar e-mail no meio do comentário" do
      CommentsNormalizer.normalize("teste@testando.com").should eql "teste@testando.com"
    end
  end
end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment