dunn/phead.pl

## phead.pl
#!/usr/bin/perl

# written by Alex Dunn in 2012
# this code may be used for any purpose whatever

# this script is shitty and will not always produce correct header
# identifers; everything that should occur is documented at:
# http://johnmacfarlane.net/pandoc/README.html#header-identifiers-in-html-latex-and-context

use strict;
use warnings;

my $header;

# grab the text
$header = <>;

# remove one set of matching underscores (italics)
$header =~ s/(.*?)_(.*?)_(.*?)/$1$2$3/g;

# strip everything but alphanumberic characters and underscores,
# periods, spaces, and hyphens.
# the list of accented characters is from:
# http://stackoverflow.com/a/6664820/1431858
$header =~ s/[^âãäåæçèéêëìíîïðñòóôõøùúûüýþÿıA-Za-z0-9-\_\.\ ]//g;

# remove en- and em-dashes
$header =~ s/-{2,3}//g;

# dashes for spaces
$header =~ s/\ /-/g;

# LOWER CASE
$header =~ s/([A-Z])/\L$1/g;

print $header;
	#!/usr/bin/perl

	# written by Alex Dunn in 2012
	# this code may be used for any purpose whatever

	# this script is shitty and will not always produce correct header
	# identifers; everything that should occur is documented at:
	# http://johnmacfarlane.net/pandoc/README.html#header-identifiers-in-html-latex-and-context

	use strict;
	use warnings;

	my $header;

	# grab the text
	$header = <>;

	# remove one set of matching underscores (italics)
	$header =~ s/(.?)_(.?)_(.*?)/$1$2$3/g;

	# strip everything but alphanumberic characters and underscores,
	# periods, spaces, and hyphens.
	# the list of accented characters is from:
	# http://stackoverflow.com/a/6664820/1431858
	$header =~ s/[^âãäåæçèéêëìíîïðñòóôõøùúûüýþÿıA-Za-z0-9-\_\.\ ]//g;

	# remove en- and em-dashes
	$header =~ s/-{2,3}//g;

	# dashes for spaces
	$header =~ s/\ /-/g;

	# LOWER CASE
	$header =~ s/([A-Z])/\L$1/g;

	print $header;