doitian/sort-header.pl

## sort-header.pl
#!/usr/bin/env perl
#
# Author: Ian Yang <doit dot ian at gmail dot com>
# Updated: <2009-10-24 22:11:10>
#
# This script will separate header file into block and sort them in
# each block
#
# Usage:
#
# sort-header.pl file [file ..]
# cat file | sort-header.pl
#
# It can be easily integrated into editors that supporting command
# filtering, like Emacs and Vim.
#
# In Emacs:
#
#   1) Mark the region
#
#   2) C-u M-| sort-header.pl
#
#   or sort whole buffer:
#
#     C-x h C-u M-| sort-header.pl
#
# In Vim:
#
#   1) Switch to command mode.
#   2) Go to the start of the region and mark
#      m a
#   3) Go to the end of the region
#      :'a,. ! sort-header.pl
#
#   or sort whole file:
#
#      :1,$ ! sort-header.pl
#

use strict;

# small number shows first
use constant {
    C_STD         => 1,
    CPP_STD       => 2,
    C_POSIX       => 3,
    BOOST         => 4,
    TOKYO_CABINET => 5,
    GLOG          => 6,
};

my %order_of = (
    # C STD LIBRARY
    'assert.h'   => C_STD,
    'complex.h'  => C_STD,
    'ctype.h'    => C_STD,
    'errno.h'    => C_STD,
    'fenv.h'     => C_STD,
    'float.h'    => C_STD,
    'inttypes.h' => C_STD,
    'iso646.h'   => C_STD,
    'limits.h'   => C_STD,
    'locale.h'   => C_STD,
    'math.h'     => C_STD,
    'setjmp.h'   => C_STD,
    'signal.h'   => C_STD,
    'stdarg.h'   => C_STD,
    'stdbool.h'  => C_STD,
    'stddef.h'   => C_STD,
    'stdint.h'   => C_STD,
    'stdio.h'    => C_STD,
    'stdlib.h'   => C_STD,
    'string.h'   => C_STD,
    'tgmath.h'   => C_STD,
    'time.h'     => C_STD,
    'wchar.h'    => C_STD,
    'wctype.h'   => C_STD,

    # C++ STD LIBRARY
    'string'     => CPP_STD,
    'ios'        => CPP_STD,
    'iostream'   => CPP_STD,
    'iomanip'    => CPP_STD,
    'fstream'    => CPP_STD,
    'sstream'    => CPP_STD,
    'vector'     => CPP_STD,
    'deque'      => CPP_STD,
    'list'       => CPP_STD,
    'map'        => CPP_STD,
    'set'        => CPP_STD,
    'stack'      => CPP_STD,
    'queue'      => CPP_STD,
    'bitset'     => CPP_STD,
    'algorithm'  => CPP_STD,
    'functional' => CPP_STD,
    'iterator'   => CPP_STD,
    'cassert'    => CPP_STD,
    'cctype'     => CPP_STD,
    'cerrno'     => CPP_STD,
    'climits'    => CPP_STD,
    'clocale'    => CPP_STD,
    'cmath'      => CPP_STD,
    'csetjmp'    => CPP_STD,
    'csignal'    => CPP_STD,
    'cstdarg'    => CPP_STD,
    'cstddef'    => CPP_STD,
    'cstdio'     => CPP_STD,
    'cstdint'    => CPP_STD,
    'cstdlib'    => CPP_STD,
    'cstring'    => CPP_STD,
    'ctime'      => CPP_STD,

    # C POSIX LIBRARY
    'cpio.h'        => C_POSIX,
    'dirent.h'      => C_POSIX,
    'fcntl.h'       => C_POSIX,
    'grp.h'         => C_POSIX,
    'pwd.h'         => C_POSIX,
    'sys/ipc.h'     => C_POSIX,
    'sys/msg.h'     => C_POSIX,
    'sys/sem.h'     => C_POSIX,
    'sys/stat.h'    => C_POSIX,
    'sys/time.h'    => C_POSIX,
    'sys/types.h'   => C_POSIX,
    'sys/utsname.h' => C_POSIX,
    'sys/wait.h'    => C_POSIX,
    'tar.h'         => C_POSIX,
    'termios.h'     => C_POSIX,
    'unistd.h'      => C_POSIX,
    'utime.h'       => C_POSIX,

    # TOKYO CABINET
    'tcadb.h'  => TOKYO_CABINET,
    'tcbdb.h'  => TOKYO_CABINET,
    'tcfdb.h'  => TOKYO_CABINET,
    'tchdb.h'  => TOKYO_CABINET,
    'tctdb.h'  => TOKYO_CABINET,
    'tcutil.h' => TOKYO_CABINET,
);

my %prefix_order_of = (
    "boost/"   => BOOST,
    "glog/"    => GLOG,
);

sub get_order {
    my $file = shift;

    return $order_of{$file} if exists($order_of{$file});

    if ($file =~ m!^(\w*[_/])!) {
        return $prefix_order_of{$1} if exists($prefix_order_of{$1});
    }

    0;
}


my @local_headers;              #include "..."
my @system_headers;             #include <...>

# is collecting header files now?
my $collecting = 0;
for (<>) {
    if (/^\s*#\s*include\s*"([^"]*)"\s*$/) {
        $collecting = 1;
        push @local_headers, $1;
    }
    elsif (/^\s*#\s*include\s*<([^>]*)>\s*$/) {
        $collecting = 1;
        push @system_headers, $1;
    }
    elsif (/^\s*$/) {
        print unless $collecting;
    }
    else {
        $collecting = 0;
        # not header file, print collected headers
        if (@local_headers || @system_headers) {
            &sort_and_print;
            print "\n";
            $#local_headers = -1;
            $#system_headers = -1;
        }
        print;
    }
}

# any header files not processed?
&sort_and_print;

sub sort_and_print {
    my $maybe_newline_is_first = 1;
    sub maybe_newline {
        unless ($maybe_newline_is_first) {
            print "\n";
        } else {
            $maybe_newline_is_first = 0;
        }
    }

    # used to remove duplicated
    my %seen;
    my @unique_local_headers = grep { !$seen{$_}++ } @local_headers;

    # local, just sort
    $maybe_newline_is_first = 0 if @unique_local_headers;
    for my $header (sort @unique_local_headers) {
        print "#include \"$header\"\n";
    }

    # reused %seen so that header files occurred as local header are also removed
    my @unique_system_headers = grep { !$seen{$_}++ } @system_headers;

    # system, separate by block
    my %headers_in_block;
    my %not_found;
    for my $header (sort @unique_system_headers) {
        my $order = &get_order($header);
        if ($order > 0) {
            push @{$headers_in_block{$order}}, $header;
        } else {
            my ($dir) = ($header =~ m!^(\w*)/!);
            push @{$not_found{$dir}}, $header;
        }
    }

    # first headers in not_found
    for my $dir (sort keys %not_found) {
        &maybe_newline;
        for my $header (@{$not_found{$dir}}) {
            print "#include <$header>\n";
        }
    }

    # then each block in reverse order
    for my $block (sort {$b <=> $a} keys %headers_in_block) {
        &maybe_newline;
        for my $header (@{$headers_in_block{$block}}) {
            print "#include <$header>\n";
        }
    }
}