Skip to content

Instantly share code, notes, and snippets.

@williamn
Created September 1, 2012 03:42
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save williamn/3563429 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# Finds duplicate adjacent words.
use strict ;
my $DupCount = 0 ;
if (!@ARGV) {
print "usage: $0 <file> ...\n" ;
exit ;
}
while (1) {
my $FileName = shift @ARGV ;
# Exit code = number of duplicates found.
exit $DupCount if (!$FileName) ;
open FILE, $FileName or die $!;
my $LastWord = "" ;
my $LineNum = 0 ;
while (<FILE>) {
chomp ;
$LineNum ++ ;
my @words = split (/(\W+)/) ;
foreach my $word (@words) {
# Skip spaces:
next if $word =~ /^\s*$/ ;
# Skip punctuation:
if ($word =~ /^\W+$/) {
$LastWord = "" ;
next ;
}
# Found a dup?
if (lc($word) eq lc($LastWord)) {
print "$FileName:$LineNum $word\n" ;
$DupCount ++ ;
} # Thanks to Sean Cronin for tip on case.
# Mark this as the last word:
$LastWord = $word ;
}
}
close FILE ;
}
#!/bin/bash
irregulars="awoken|\
been|born|beat|\
become|begun|bent|\
beset|bet|bid|\
bidden|bound|bitten|\
bled|blown|broken|\
bred|brought|broadcast|\
built|burnt|burst|\
bought|cast|caught|\
chosen|clung|come|\
cost|crept|cut|\
dealt|dug|dived|\
done|drawn|dreamt|\
driven|drunk|eaten|fallen|\
fed|felt|fought|found|\
fit|fled|flung|flown|\
forbidden|forgotten|\
foregone|forgiven|\
forsaken|frozen|\
gotten|given|gone|\
ground|grown|hung|\
heard|hidden|hit|\
held|hurt|kept|knelt|\
knit|known|laid|led|\
leapt|learnt|left|\
lent|let|lain|lighted|\
lost|made|meant|met|\
misspelt|mistaken|mown|\
overcome|overdone|overtaken|\
overthrown|paid|pled|proven|\
put|quit|read|rid|ridden|\
rung|risen|run|sawn|said|\
seen|sought|sold|sent|\
set|sewn|shaken|shaven|\
shorn|shed|shone|shod|\
shot|shown|shrunk|shut|\
sung|sunk|sat|slept|\
slain|slid|slung|slit|\
smitten|sown|spoken|sped|\
spent|spilt|spun|spit|\
split|spread|sprung|stood|\
stolen|stuck|stung|stunk|\
stridden|struck|strung|\
striven|sworn|swept|\
swollen|swum|swung|taken|\
taught|torn|told|thought|\
thrived|thrown|thrust|\
trodden|understood|upheld|\
upset|woken|worn|woven|\
wed|wept|wound|won|\
withheld|withstood|wrung|\
written"
if [ "$1" = "" ]; then
echo "usage: `basename $0` <file> ..."
exit
fi
egrep -n -i --color \
"\\b(am|are|were|being|is|been|was|be)\
\\b[ ]*(\w+ed|($irregulars))\\b" $*
exit $?
#!/bin/bash
weasels="many|various|very|fairly|several|extremely\
|exceedingly|quite|remarkably|few|surprisingly\
|mostly|largely|huge|tiny|((are|is) a number)\
|excellent|interestingly|significantly\
|substantially|clearly|vast|relatively|completely"
wordfile=""
# Check for an alternate weasel file
if [ -f $HOME/etc/words/weasels ]; then
wordfile="$HOME/etc/words/weasels"
fi
if [ -f $WORDSDIR/weasels ]; then
wordfile="$WORDSDIR/weasels"
fi
if [ -f words/weasels ]; then
wordfile="words/weasels"
fi
if [ ! "$wordfile" = "" ]; then
weasels="xyzabc123";
for w in `cat $wordfile`; do
weasels="$weasels|$w"
done
fi
if [ "$1" = "" ]; then
echo "usage: `basename $0` <file> ..."
exit
fi
egrep -i -n --color "\\b($weasels)\\b" $*
exit $?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment