Skip to content

Instantly share code, notes, and snippets.

@clintongormley
Created April 18, 2014 09:38
Show Gist options
  • Save clintongormley/11034437 to your computer and use it in GitHub Desktop.
Save clintongormley/11034437 to your computer and use it in GitHub Desktop.
Emoticon analyzer
e_smiley :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) :っ) :-)) :-))) :-)))) :)) :))) :))))
e_laugh :-D :D 8-D 8D x-D xD X-D XD =-D =D =-3 =3 B^D
e_sad >:[ :-( :( :-c :c :-< :っC :< :-[ :[ :{
e_wink_frown ;(
e_angry :-|| :@ >:(
e_cry :'-( :'(
e_happy_tears :'-) :')
e_disgust D:< D: D8 D; D= DX v.v D-':
e_surprise >:O :-O :O :-o :o 8-0 O_O o-o O_o o_O o_o O-O
e_kiss :* :^* '}{'
e_wink ;-) ;) *-) *) ;-] ;] ;D ;^) :-,
e_tongue >:P :-P :P X-P x-p xp XP :-p :p =p :-Þ :Þ :þ :-þ :-b :b d:
e_skeptical >:\ >:/ :-/ :-. :/ :\ =/ =\ :L =L :S >.<
e_neutral :| :-|
e_embarrassed :$ :S
e_silent :-X :X :-# :#
e_halo O:-) 0:-3 0:3 0:-) 0:) 0;^)
e_evil >:) >;) >:-)
e_devil }:-) }:) 3:-) 3:)
e_highfive o/\o ^5 >_>^ ^<_<
e_disapprove ಠ_ಠ
e_cheer \o/
e_heart <3
e_broken_heart </3
DELETE /test
PUT /test
{
"settings": {
"analysis": {
"char_filter": {
"e_happy_tears": {
"pattern": "\\:\\'(?:\\-\\)|\\))",
"type": "pattern_replace",
"replacement": " e_happy_tears "
},
"e_wink_frown": {
"pattern": "\\;\\(",
"type": "pattern_replace",
"replacement": " e_wink_frown "
},
"e_broken_heart": {
"pattern": "\\<\\/3",
"type": "pattern_replace",
"replacement": " e_broken_heart "
},
"e_silent": {
"pattern": "\\:(?:\\-[\\#X]|[\\#X])",
"type": "pattern_replace",
"replacement": " e_silent "
},
"e_cheer": {
"pattern": "\\\\o\\/",
"type": "pattern_replace",
"replacement": " e_cheer "
},
"e_tongue": {
"pattern": "(?:\\:(?:\\-[PbpÞþ]|[PbpÞþ])|\\=p|\\>\\:P|X(?:\\-P|P)|d\\:|x(?:\\-p|p))",
"type": "pattern_replace",
"replacement": " e_tongue "
},
"e_wink": {
"pattern": "(?:\\*(?:\\-\\)|\\))|\\:\\-\\,|\\;(?:\\-[\\)\\]]|\\^\\)|[\\)D\\]]))",
"type": "pattern_replace",
"replacement": " e_wink "
},
"e_laugh": {
"pattern": "(?:8(?:\\-D|D)|\\:(?:\\-D|D)|\\=(?:\\-[3D]|[3D])|B\\^D|X(?:\\-D|D)|x(?:\\-D|D))",
"type": "pattern_replace",
"replacement": " e_laugh "
},
"e_surprise": {
"pattern": "(?:8\\-0|\\:(?:\\-[Oo]|[Oo])|\\>\\:O|O(?:\\-O|_[Oo])|o(?:\\-o|_[Oo]))",
"type": "pattern_replace",
"replacement": " e_surprise "
},
"e_disapprove": {
"pattern": "ಠ_ಠ",
"type": "pattern_replace",
"replacement": " e_disapprove "
},
"e_embarrassed": {
"pattern": "\\:[\\$S]",
"type": "pattern_replace",
"replacement": " e_embarrassed "
},
"e_kiss": {
"pattern": "(?:\\'\\}\\{\\'|\\:(?:\\^\\*|\\*))",
"type": "pattern_replace",
"replacement": " e_kiss "
},
"e_cry": {
"pattern": "\\:\\'(?:\\-\\(|\\()",
"type": "pattern_replace",
"replacement": " e_cry "
},
"e_devil": {
"pattern": "(?:3\\:(?:\\-\\)|\\))|\\}\\:(?:\\-\\)|\\)))",
"type": "pattern_replace",
"replacement": " e_devil "
},
"e_angry": {
"pattern": "(?:\\:(?:\\-\\|\\||\\@)|\\>\\:\\()",
"type": "pattern_replace",
"replacement": " e_angry "
},
"e_halo": {
"pattern": "(?:0(?:\\:(?:\\-[\\)3]|[\\)3])|\\;\\^\\))|O\\:\\-\\))",
"type": "pattern_replace",
"replacement": " e_halo "
},
"e_disgust": {
"pattern": "(?:D(?:\\-\\'\\:|\\:\\<?|[8\\;\\=X])|v\\.v)",
"type": "pattern_replace",
"replacement": " e_disgust "
},
"e_skeptical": {
"pattern": "(?:\\:(?:\\-[\\.\\/]|[\\/LS\\\\])|\\=[\\/L\\\\]|\\>(?:\\.\\<|\\:[\\/\\\\]))",
"type": "pattern_replace",
"replacement": " e_skeptical "
},
"e_evil": {
"pattern": "\\>(?:\\:(?:\\-\\)|\\))|\\;\\))",
"type": "pattern_replace",
"replacement": " e_evil "
},
"e_smiley": {
"pattern": "(?:8\\)|\\:(?:\\)(?:\\)(?:\\)\\)?)?)?|\\-\\)(?:\\)(?:\\)\\)?)?)?|\\^\\)|c\\)|o\\)|っ\\)|[3\\>\\]\\}])|\\=[\\)\\]])",
"type": "pattern_replace",
"replacement": " e_smiley "
},
"e_highfive": {
"pattern": "(?:\\>_\\>\\^|\\^(?:\\<_\\<|5)|o\\/\\\\o)",
"type": "pattern_replace",
"replacement": " e_highfive "
},
"e_neutral": {
"pattern": "\\:(?:\\-\\||\\|)",
"type": "pattern_replace",
"replacement": " e_neutral "
},
"e_heart": {
"pattern": "\\<3",
"type": "pattern_replace",
"replacement": " e_heart "
},
"e_sad": {
"pattern": "(?:\\:(?:\\-[\\(\\<\\[c]|っC|[\\(\\<\\[c\\{])|\\>\\:\\[)",
"type": "pattern_replace",
"replacement": " e_sad "
}
},
"analyzer": {
"emoticons": {
"filter": [
"lowercase"
],
"char_filter": [
"e_smiley",
"e_laugh",
"e_sad",
"e_wink_frown",
"e_angry",
"e_cry",
"e_happy_tears",
"e_disgust",
"e_surprise",
"e_kiss",
"e_wink",
"e_tongue",
"e_skeptical",
"e_neutral",
"e_embarrassed",
"e_silent",
"e_halo",
"e_evil",
"e_devil",
"e_highfive",
"e_disapprove",
"e_cheer",
"e_heart",
"e_broken_heart"
],
"tokenizer": "standard"
}
}
}
}
}
# "Awesome day ;) don't you think?"
GET /test/_analyze?analyzer=emoticons&pretty=1&text=Awesome+day+%3B)+don't+you+think%3F
# {
# "tokens" : [
# {
# "end_offset" : 7,
# "position" : 1,
# "start_offset" : 0,
# "type" : "<ALPHANUM>",
# "token" : "awesome"
# },
# {
# "end_offset" : 11,
# "position" : 2,
# "start_offset" : 8,
# "type" : "<ALPHANUM>",
# "token" : "day"
# },
# {
# "end_offset" : 13,
# "position" : 3,
# "start_offset" : 13,
# "type" : "<ALPHANUM>",
# "token" : "e_wink"
# },
# {
# "end_offset" : 20,
# "position" : 4,
# "start_offset" : 15,
# "type" : "<ALPHANUM>",
# "token" : "don't"
# },
# {
# "end_offset" : 24,
# "position" : 5,
# "start_offset" : 21,
# "type" : "<ALPHANUM>",
# "token" : "you"
# },
# {
# "end_offset" : 30,
# "position" : 6,
# "start_offset" : 25,
# "type" : "<ALPHANUM>",
# "token" : "think"
# }
# ]
# }
@Mpdreamz
Copy link

Reading this made me remember nearly a decade old code I wrote !

http://www.mircscripts.org/showdoc.php?type=code&id=3033

Very interesting technique of doing replacements with partterns char filters by the way, would have never thought of doing that!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment