Skip to content

Instantly share code, notes, and snippets.

@scottoffen
Created April 23, 2014 16:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scottoffen/11223520 to your computer and use it in GitHub Desktop.
Save scottoffen/11223520 to your computer and use it in GitHub Desktop.
Perl parse and merge Apache mime.types and a list from StackOverflow to genereate ContentType.cs (https://gist.github.com/scottoffen/11197961)
#!/usr/bin/perl
use strict;
use warnings;
#----------------------------------------------------------------------------------#
# Run from the command line. This will first parse the Apache mime.types file from #
# here (http://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.types), #
# and then the dictionary portion only of this (http://stackoverflow.com/a/7161265)#
# StackOverflow answer (starting on the fifth line down), saved as mime.dict. All #
# files are expected to be in the same directory. #
# #
# When parsing the dictionary, if an entry is found that was already found in the #
# Apache mime.types file, you will be prompted to make a selection to use the new #
# value from the dictionary (1) or the old value from mime.types (2), or you can #
# provide your own value instead. #
# #
# Types are binary by default, and are changed to text if they meet any of the #
# following three conditions: #
# #
# 1. Does the type begin with the word "text"? #
# #
# 2. Does the type end in "+xml" or "xml"? #
# #
# 3. Does the type contain the word "json" or "javascript" #
# #
# Extensions that start with digits or contain a dash (-) are ignored. #
#----------------------------------------------------------------------------------#
my (%extensions, $subtotala, $subtotals, $identical, $conflicts, $grandtotal);
my $inputa = "./mime.types";
my $inputs = "./mime.dict";
my $output = "./content-types.txt";
# Remove any previous output file
unlink($output) if (-e $output);
#----------------------------------------------------------------------------------#
# MIMEType Mismatch Resolver #
#----------------------------------------------------------------------------------#
sub resolve ($$$)
{
my ($ext, $new, $old) = @_;
print "\nExtension Mismatch : $ext\n";
print "(1) New : <$new>\n";
print "(2) Old : <$old>\n";
print "Which one would you like to use?\n";
print "<enter> or 1 for New, 2 for Old, or type your own > ";
my $value = <STDIN>;
chop($value);
$value = 1 unless ($value);
return ($value eq "1") ? $new : ($value eq "2") ? $old : $value;
}
#----------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------#
# Parse Apache Extensions #
#----------------------------------------------------------------------------------#
{
open(INPUTA, $inputa);
my @inputa = <INPUTA>;
close(INPUTA);
foreach my $line (@inputa)
{
chop($line);
$line =~ s/^#//;
my ($type, $extensions) = split("\t+", $line, 2);
next unless ($extensions);
$type =~ s/\s+$//;
$type =~ s/^\s+//;
my @extensions = split(" +", $extensions);
foreach my $extension (@extensions)
{
$extensions{uc($extension)} = $type;
}
}
$subtotala = (scalar (keys %extensions));
}
#----------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------#
# Parse StackOverflow Extensions #
#----------------------------------------------------------------------------------#
{
my $file;
{
local $/;
open(INPUTS, $inputs);
$file = <INPUTS>;
close(INPUTS);
}
$file =~ s/[\{\}]//g;
my @inputs = split(",\n", $file);
foreach my $line (@inputs)
{
$line =~ s/^\"//;
$line =~ s/\"$//;
my ($extension, $type) = split("\", ?\"", $line, 2);
next unless ($type);
$extension = uc($extension);
$type =~ s/\s+$//;
$type =~ s/^\s+//;
if (exists $extensions{$extension})
{
if ($extensions{$extension} !~ /$type/i)
{
$conflicts++;
$type = resolve($extension, $type, $extensions{$extension});
$extensions{$extension} = $type;
}
else
{
$identical++;
}
}
else
{
$extensions{$extension} = $type;
}
}
$subtotals = (scalar @inputs);
}
#----------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------#
# Display Statistics #
#----------------------------------------------------------------------------------#
{
$grandtotal = (scalar (keys %extensions));
print "\n\n";
print "Sub Total A : $subtotala\n";
print "Sub Total S : $subtotals\n";
print "Identical : $identical\n";
print "Conflicts : $conflicts\n";
print "-------------------\n";
print "Grand Total : $grandtotal\n\n";
}
#----------------------------------------------------------------------------------#
#----------------------------------------------------------------------------------#
# Publish Output #
#----------------------------------------------------------------------------------#
{
my @output;
foreach my $extension (sort keys %extensions)
{
next if ($extension =~ /^\d/);
next if ($extension =~ /[\-\=]/);
my $type = $extensions{$extension};
my $flag = (($type =~ /^text/i) || ($type =~ /\+?xml$/i) || ($type =~ /(json|javascript)/)) ? "IsText" : "IsBinary";
push (@output, "\t[Metadata(Value=\"$type\", $flag = true)]\n\t$extension");
}
open(OUTPUT, ">$output");
print OUTPUT "public enum ContentType\n{\n";
print OUTPUT join(",\n\n", @output);
print OUTPUT "\n}\n";
close(OUTPUT);
}
#----------------------------------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment