Skip to content

Instantly share code, notes, and snippets.

@Timopheym
Created May 29, 2015 20:17
Show Gist options
  • Save Timopheym/b7e97424bb9485ab36bb to your computer and use it in GitHub Desktop.
Save Timopheym/b7e97424bb9485ab36bb to your computer and use it in GitHub Desktop.
## English language utilities for text processing
## Check whether the apostrophe placement follows English rules
sub allow_word ($)
{
my $word = shift;
return ($word =~ /^[A-Z][A-Z\'-]+$/) && (index($word,"'") < 0 || allow_apostrophe($word));
}
sub allow_apostrophe ($)
{
my $word = shift;
my $ok = 0;
# Possessive singular (e.g. baron's)
if ($word =~ /[A-Z-]+'S$/) {
$ok = 1;
}
# Names (e.g. O'Reilly)
if ($word =~ /^O'/) {
$ok = 1;
}
# Contraction with will (e.g. I'll, we'll, they'll)
if ($word =~ /'LL$/) {
$ok = 1;
}
# Contraction with have (e.g. they've, I've, you've)
if ($word =~ /'VE$/) {
$ok = 1;
}
# Possessive plural (e.g. barons')
if ($word =~ /S'$/) {
$ok = 1;
}
# Colloquial omitted G (e.g. rainin')
if ($word =~ /N'$/) {
$ok = 1;
}
# Contraction with not (e.g. won't, shan't, can't)
if ($word =~ /N'T$/) {
$ok = 1;
}
# Contraction with would (e.g. I'd, he'd, they'd)
if ($word =~ /'D$/) {
$ok = 1;
}
# Contraction with are (e.g. we're, they're)
if ($word =~ /'RE$/) {
$ok = 1;
}
# Names (e.g. D'Oliveira)
if ($word =~ /^D'/) {
$ok = 1;
}
# Contraction of I am = I'm
if ($word eq "I'M") {
$ok = 1;
}
return $ok;
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment