Skip to content

Instantly share code, notes, and snippets.

@aijaz
Created January 8, 2012 00:17
Show Gist options
  • Save aijaz/1576578 to your computer and use it in GitHub Desktop.
Save aijaz/1576578 to your computer and use it in GitHub Desktop.
Create tag clouds and tag files for your octopress blog
#!/usr/bin/perl
## WHAT IS THIS?
##
## PLEASE SEE http://aijazansari.com/2012/01/07/tag-clouds-with-octopress/ for more context.
##
use strict;
use warnings;
use File::Find;
use HTML::TreeBuilder;
use Getopt::Long;
my $octopress_root;
my $options_read = GetOptions("dir=s", \$octopress_root);
############################################################
unless ($options_read && $octopress_root) {
print "\n";
print "\n";
print "usage: tagify.pl --dir d\n";
print "\n";
print "where d is the root octopress directory\n";
print " - the parent of source, public, etc.\n";
print "\n";
exit 1;
}
# The tag cloud HTML gets saved into this file.
# This file is included by two others:
# a) The file used for the sidebar aside
# b) The page used to display all tags
# (accessible as /tags/index.html)
#
my $custom_file = "$octopress_root/source/_includes".
"/custom/tag_cloud.html";
# This is the data structure that contains all the tag
# data parsed by the HTML files.
# It's key is the tag name (not case-normalized).
# The value is another hash. That hash has 2 keys:
# count - number of pages with that tag
# range_num - a number from 1 - 10 indicating
# popularity (see below)
# pages - an array of hashes
#
# Each hash in the pages array has 3 keys:
# title - the HTML title of the post
# file - the full file name of the HTML file
# categories - yet another hash
#
# The categories hash has two keys:
# href - the url to the category page (as determined
# by OctoPress)
# text - the name of the category (as displayed by
# Octopress)
#
my $tag_data = { };
# This function populates the tag_data data structure
#
find(\&getTags, "$octopress_root/public");
# Find the number of times the most popular tag is used
#
my $max = 1; # start with 1, not 0 to prevent a
# divide-by-zero error later
# if none of the posts have tags
foreach my $tag (keys %$tag_data) {
$tag_data->{$tag}->{count} = scalar(@{$tag_data->{$tag}->{pages}});
if ($tag_data->{$tag}->{count} > $max) {
$max = $tag_data->{$tag}->{count};
}
}
# Assign each tag a range number from 1 - 10
# based on popularity. This range number will
# be used along with CSS to print tags with
# the appropriate size.
#
foreach my $tag (keys %$tag_data) {
$tag_data->{$tag}->{range_num} =
int(($tag_data->{$tag}->{count} / $max)
* 10
+ 0.5); # nearest whole number
if ($tag_data->{$tag}->{range_num} == 0) {
$tag_data->{$tag}->{range_num} = 1;
# we want 1-10, not 0-10
}
}
# Write the tag cloud file
#
open (O, ">$custom_file") || die;
print O "<div id='tag_cloud'>\n";
# sort by tag name, case insensitive
#
foreach my $tag (sort { lc($a) cmp lc($b)}
keys %$tag_data) {
# give each tag anchor a title,
# for screen readers and the like
#
my $plural = "y";
if ($tag_data->{$tag}->{count} > 1) {
$plural = 'ies';
}
print O qq[<a href="/tags/$tag/" ];
print O qq[title="$tag_data->{$tag}->{count} entr$plural" ];
print O qq[class="tag_$tag_data->{$tag}->{range_num}">];
print O qq[$tag];
print O qq[</a>\n];
}
print O "</div>\n";
close O;
# Now save the individual tag files
# First, clear out the directory because we're gonna
# regenerate all the files.
#
my $tag_dir = "$octopress_root/source/tags";
# If source/tags exists but is a file
#
die "source/tags is a file" if (-f $tag_dir);
# Create the directory if it doesn't exist
#
if (!-d $tag_dir) {
mkdir $tag_dir;
createTagsIndexMarkdown($tag_dir);
}
else {
# Delete all directories under source/tags.
# We don't want to delete everything because we
# need to preserve tags/index.markdown in case
# something was modified there.
#
my $dirs = `find $tag_dir/* -type d`;
my @dirs = split(/[\r\n]+/, $dirs);
foreach my $dir (@dirs) {
`/bin/rm -rf $dir`;
}
}
# Make a file for each tag.
#
foreach my $tag (keys %$tag_data) {
makeTagFile($tag);
}
## ####################################################
## Functions
## ####################################################
sub makeTagFile {
my $tag = shift;
mkdir "source/tags/$tag"
||
die "Couldn't make directory source/tags/$tag";
open (O, "> source/tags/$tag/index.markdown")
||
die "Can't open source/tags/$tag/index.markdown";
print O qq^---
layout: page
title: Tag&#58; $tag
footer: false
---
<div id="blog-archives" class="category">
^;
my $year = 0;
# Sort by file name descending
# This is the same as sorting by date descending
#
foreach my $file (
sort { $b->{file} cmp $a->{file} }
@{$tag_data->{$tag}->{pages}}) {
# Get the year month and date
#
my ($yyyy, $mm, $dd) = $file->{file} =~
m!(\d\d\d\d)/(\d\d)/(\d\d)/!;
# The HTML and associated logic here mimics
# the HTML of the category pages - print
# a H2 for every new year
#
if ($yyyy != $year) {
$year = $yyyy;
print O "<h2>$year</h2>\n";
}
# Construct the URL & date string
#
my $url = $file->{file};
$url =~ s/^$octopress_root\/public//;
my $title = $file->{title};
my @months = qw ( x Jan Feb Mar Apr May Jun
Jul Aug Sep Oct Nov Dec );
my $mon = $months[$mm * 1];
print O qq[
<article>
<h1><a href="$url">$title</a></h1>
<time datetime="$yyyy-$mm-${dd}T00:00:00-06:00" pubdate><span class='month'>$mon</span> <span class='day'>$dd</span> <span class='year'>$yyyy</span></time>
<footer>
<span class="categories">posted in
];
# Print each category, separated by commas
#
print O join(", ",
map { "<a href='$_->{href}'>$_->{text}</a>" }
@{$file->{categories}}
);
print O qq[</span>
</footer>
</article>
];
}
print O "</div>\n";
close O;
}
sub getTags {
my $file = $File::Find::name;
# Only parse files that look like posts
#
return unless $file =~ /\.html$/;
return unless $file =~
/^$octopress_root\/public\/\d{4}\/\d{2}\/\d{2}\//;
# Read the contents of the HTML file
#
open (HTML, $_) || die "Can't open $file";
my $contents = join("", <HTML>);
close HTML;
my $tree = HTML::TreeBuilder->new();
$tree->parse($contents);
# Get the title
#
my $title = $tree->look_down(_tag => "h1",
class => "entry-title");
$title = $title->as_trimmed_text();
# Get the categories
#
my $category_ent = $tree->look_down(_tag => "span",
class => "categories");
my @as = $category_ent->look_down(_tag => "a",
class => "category");
my @categories = ();
foreach my $a (@as) {
push(@categories,
{ href => $a->attr('href'),
text => $a->as_trimmed_text
});
}
# Get the tags
#
my $ul = $tree->look_down("_tag", "ul",
"id" , "tags_ul");
if ($ul) {
my @items = $ul->look_down("_tag" => "li");
foreach my $item (@items) {
my $tag = $item->as_trimmed_text();
# Finally, populate the data structure
#
push (@{$tag_data->{$tag}->{pages}},
{ title => $title,
file => $file,
categories => \@categories
} );
}
}
else {
# no tags in this document
}
$tree->delete();
}
# This function creates a default
# source/tags/index.markdown
#
sub createTagsIndexMarkdown {
my $tag_dir = shift;
open (O, ">$tag_dir/index.markdown");
print O qq[---
layout: page
title: Tags
footer: false
---
<div class="tag_page">
{% include custom/tag_cloud.html %}
</div>
];
close O;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment