Skip to content

Instantly share code, notes, and snippets.

@jgreely
Created May 15, 2017 20:27
Show Gist options
  • Save jgreely/07104eb1e798f97b523d7a381d4fc7ad to your computer and use it in GitHub Desktop.
Save jgreely/07104eb1e798f97b523d7a381d4fc7ad to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
#
# take a list of Hugo content files and add random taxonomies
# to their TOML front matter:
# find content -name '*.md' | taxonomies.pl -T 5 -t 1000
use strict;
use List::Util qw(shuffle);
use Getopt::Long qw(:config no_ignore_case bundling);
my $taxnum = 1;
my $termnum = 1000;
my $termfile = "/usr/share/dict/words";
my $maxterm = 5;
my $minterm = 0;
GetOptions(
"taxnum|T=i" => \$taxnum,
"termnum|t=i" => \$termnum,
"maxterm|m=i" => \$maxterm,
"minterm|M=i" => \$minterm,
"termfile|f=s" => \$termfile,
) or die <<EOF;
Usage: $0 [options] [file ...]
-T number of taxonomies
-t number of terms per taxonomy
-m maximum number of terms applied to an article, per taxonomy
-M add minimum number of terms/tax (-m5 -M5 = 6-10)
-f source file for taxonomy/term words (1/line, first 20000 words that
have 4+ characters consisting only of a-z; default source is
/usr/share/dict/words)
EOF
$maxterm--;
my @words;
open(In,$termfile) or die "$0: $termfile: $!\n";
while (<In>) {
chomp;
next unless /^[a-z]{4,}$/;
push(@words,$_);
}
close(In);
@words = shuffle(@words);
$#words = 20000 if $#words > 20000;
my @tax = splice(@words,0,$taxnum);
my %terms;
foreach my $n (1..$taxnum) {
my @tmp = @words[1..$termnum];
$terms{$tax[$n-1]} = \@tmp;
@words = shuffle(@words);
}
print "Add to config.toml:\n";
map(printf(qq(%s = "%s"\n),$_,$_),@tax);
chomp(my @files = <>);
foreach my $in (@files) {
my $out = "$in-";
open(In,$in) or die "$0: $in: $!\n";
open(Out,">$out") or die "$0: $out: $!\n";
my $inheader = 1;
while (<In>) {
if ($inheader and /^\+{3}$/) {
print Out;
my @tmp = shuffle(@tax);
$#tmp = rand($#tmp + 1);
foreach my $tax (@tmp) {
my @tmp = shuffle(@{$terms{$tax}});
$#tmp = rand($#tmp < $maxterm ? $#tmp + 1: $maxterm) + $minterm;
print Out qq($tax = [ "),join('", "',@tmp),qq(" ]\n);
}
$inheader = 0;
}else{
print Out;
}
}
close(In);
close(Out);
rename($out,$in);
}
exit 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment