Skip to content

Instantly share code, notes, and snippets.

@xiagu
Created March 10, 2015 01:01
Show Gist options
  • Save xiagu/4194865b8a55af9c14aa to your computer and use it in GitHub Desktop.
Save xiagu/4194865b8a55af9c14aa to your computer and use it in GitHub Desktop.
A patch for pisg to improve its smiley parsing with a lot more smileys.
From aa0d9705564580ad6059e9e9e793a25764dd3a9f Mon Sep 17 00:00:00 2001
From: Andrew <me@andrew.rs>
Date: Mon, 9 Mar 2015 20:53:59 -0400
Subject: [PATCH] Updated smiley parsing code
---
pisg/modules/Pisg/Parser/Logfile.pm | 51 ++++++++++++++++++++++++++++++-----
1 file changed, 44 insertions(+), 7 deletions(-)
diff --git a/pisg/modules/Pisg/Parser/Logfile.pm b/pisg/modules/Pisg/Parser/Logfile.pm
index 63c756c..ad5d913 100644
--- a/pisg/modules/Pisg/Parser/Logfile.pm
+++ b/pisg/modules/Pisg/Parser/Logfile.pm
@@ -393,6 +393,7 @@ sub _parse_file
if ($saying !~ /[a-z]/o && $saying =~ /[A-Z]/o) {
# Ignore single smileys on a line. eg. '<user> :P'
+ # TODO: Use the same smiley check as our smiley counter below
if ($saying !~ /^[8;:=][ ^-o]?[)pPD\}\]>]$/o) {
$stats->{allcaps}{$nick}++;
push @{ $lines->{allcaplines}{$nick} }, $line;
@@ -405,18 +406,54 @@ sub _parse_file
}
# Who smiles the most?
- my $e = '[8;:=%]'; # eyes
- my $n = '[-oc*^]'; # nose
- # smileys including asian-style (^^ ^_^' ^^; \o/)
- if ($saying =~ /(>?$e'?$n[\)pPD\}\]>]|[\(\{\[<]$n'?$e<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/o) {
+ my $e = '[8;:=%Xx]'; # eyes
+ my $n = '[-oc*^v]'; # nose
+ my $nm_a = '[-_o.~3]'; # eastern nose / mouth middles
+ my $a_sw = '[\'";]'; # eastern face modifiers, like sweat drops
+
+ # TODO: allow matching of lines with URLs and smileys in them
+ my $url_regex = qr/\w+:\/\//;
+
+ my $wholeword_start = '(?<![\w?&])'; # don't allow in middle of word or as query strings
+ my $wholeword_end = '(?![\w])'; # prevent matching things that start `words`: (xD) will match xD, not (x
+
+ # [\)pPD\}\]>]|[\(\{\[<]
+ my $bi_hm = '*'; # happy mouths that work either way
+ my $hm = '[DPp\)\]\}>3'.$bi_hm.']'; # Directional happy mouths (LTR -->)
+ my $rhm = '[CcL\(\[\{<'.$bi_hm.']'; # Directional happy mouths (RTL <--)
+ my $he_a = '\^'; # happy eastern eyes
+
+ # Happy faces, smileys including eastern-style (^^ ^_^' ^^; \o/)
+ # Assumed here that tears are happy tears.
+ if ($saying =~ qr/$wholeword_start(>?$e'?$n?$hm|$rhm$n?'?$e<?|($he_a)$nm_a*\g{-1}$a_sw?|\\[o.]\/)$wholeword_end/) {
$stats->{smiles}{$nick}++;
$stats->{smileys}{$1}++;
$stats->{smileynicks}{$1} = $nick;
}
- # asian frown: ;_;
- if ($saying =~ /($e'?$n[\(\[\\\/\{|]|[\)\]\\\/\}|]$n'?$e|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/o and
- $saying !~ /\w+:\/\//o) {
+ # Ambiguous faces. Neither happy nor sad.
+ # There are no tears because tears + ambiguous = sad.
+ # Ambiguous mouths would be like. :I :O :o :0 :B :F :U :u :V :v
+ my $bi_am = 'FIOo0TUuVvn'; # ambiguous mouths
+ my $am = '[BF'.$bi_am.']'; # directional ambiguous mouths (LTR -->)
+ my $ram = '['.$bi_am.']'; # directional ambiguous mouths (RTL <--)
+ my $o_eyes = '[oO0]'; # for mismatched faces
+ my $ae_a = '[.oO09]'; # ambiguous eastern eyes.
+ # 'o' is a tricky one; its meaning changes with mouth (o_o, o3o o~o) but I'm not dealing with that
+
+ if ($saying =~ qr/$wholeword_start(>?$e$n?$am|$ram$n?$e<?|($ae_a)(?!\g{-1})$nm_a+\g{-1}$a_sw?|($o_eyes)(?!\g{-1})$nm_a+$o_eyes)$wholeword_end/) {
+ $stats->{smileys}{$1}++;
+ $stats->{smileynicks}{$1} = $nick;
+ }
+
+ # Sad faces
+ # X x \ / L S s ( { [ < C c
+ my $bi_sm = 'XxSs\/|\\'; # sad mouths that work either way
+ my $sm = '[CcL\(\[\{<'.$bi_sm.']'; # directional sad mouths (LTR -->)
+ my $rsm = '[\)\]\}>3'.$bi_sm.']'; # directional sad mouths (RTL <--)
+ my $se_a = '[-;TQ><]'; # sad eastern eyes
+ # Frowny faces. >mouth< is hardcoded at end because it's weird
+ if ($saying =~ qr/$wholeword_start(>?$e'?$n?$sm|$rsm$n?'?$e<?|($se_a)(?!\g{-1})$nm_a+\g{-1}$a_sw?|>$nm_a+<)$wholeword_end/) {
$stats->{frowns}{$nick}++;
$stats->{smileys}{$1}++;
$stats->{smileynicks}{$1} = $nick;
--
1.7.10.4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment