terrycojones/gist:1253956

## gistfile1.sh
#!/bin/sh

#PATH=/bin:/usr/ucb:/usr/bin
#
#   usage: mytags [source-files]
#   Enhanced version of ctags.
#   Merge standard "ctags" and create extra tags from #define statements
#   and declarations.
#
#   Declaration cases not handled:
#   ==============================
#
#
#       - Repeated identifier names.
#         ==========================
#           Only the first instance will be tagged.
#           Be careful about ^]'ing to tags that are in functions... you may
#           not get what you want. Worse, you might get put into another file
#           without getting what you want. You can always get back with ^^
#
#
#       - Lines of declarations that are continued with a comma e.g.
#         ==========================================================
#               int fred, harry, joe,
#                   mike, dick;
#           Will not try to produce tags for mike or dick.
#
#
#       - Declaration lines that do not start with a type name e.g.
#         =========================================================
#               /* silly comment in the way */   int fred;
#           Will not tag fred.
#
#
#       - Declarations in comment blocks will be tagged e.g.
#         ==================================================
#               /* start of comment
#                   int fred;
#                   int harry;
#               end of comment */
#           Will produce tags for fred and harry (if they don't already exist).
#
#
#   Run ctags, create extra tags, sort.
#
#   Note that vi searches in NOMAGIC mode, meaning
#   only ^ and $ have any effect.  Thus we have
#   to escape these, and /\, but nothing else.
#   (Note also that due to a bug in vi you get left in
#    nomagic mode if the pattern isn't found)
#
#   Steve Hayman (MFCF)
#   Terry Jones  (F.U.N. Corporation)       18/10/87
#

if [ $# -eq 0 ]
then
    echo usage: `basename $0` files
    exit 1
fi


#
# Make the standard tags file with ctags.
#

ctags -w -t $*


#
# Do the additional tags
#

gawk '

    #
    # Initialise a few handy-dandy associative arrays.
    #

    BEGIN {
        keywd["char"]++
        keywd["int"]++
        keywd["long"]++
        keywd["double"]++
        keywd["float"]++
        keywd["short"]++
        keywd["register"]++
        keywd["static"]++
        keywd["void"]++
        keywd["unsigned"]++

        follow["["]++;
        follow["="]++
        follow[";"]++
    }

    #
    # The #define grabber.
    #

    NF > 0 && /^#[  ]*define/ {

        total_tokens++

        if ($1 == "#")
            token = $3
        else
            token = $2

        #
        # Careful with macro functions.
        #

        if ( i = index(token, "(") )
            token = substr(token, 1, i - 1)

        #
        # Set up these tags for later output (see END clause).
        #

        patterns[total_tokens] = $0
        files[total_tokens] = FILENAME
        tags[total_tokens] = token

        next
    }

    #
    # The declaration grabber.
    #

        #
        # Make sure we have some fields and that the first is a type name.
        # Could check that NF>1 but for declarations like int*fred;
        #

    NF > 0 && keywd[$1] == 1  {

        #
        # If the last field is a keyword then we must have something like
        #
        #   unsigned int
        #   silly()
        #
        # And so we should just continue to the next line
        # (We could probably do a getline before the next, but then again
        #  they might just have a #define there... who knows? who cares?)
        #

        if ( keywd[$NF] )
            next

        #
        # Check to find the first word on the line that is not in the keywd
        # array. This must (famous last words) be the identifier we want.
        #

        for ( i = 2; i <= NF; i++ ) {
            if( keywd[$i] == 0 )
                break
        }

        #
        # Get the tail of the line, starting from the first identifier.
        #

        spot = index($0, $i)
        line = substr($0, spot, length - spot + 1)

        #
        # Strip trailing characters from line like ; and = and [ if present
        #
        # *Dont* break out of the for loop once you have found one as
        # this will make the order of their declaration in the START
        # clause important. Anyway, it is not clear who would come
        # first out of = and [
        #
        # We do this here since we want a line such as
        #
        # char *fred="this is fred" /* comment about fred the char* */
        #
        # to be cut off at the "=" instead of processing each of the ten fields
        # *fred="this, is, fred", /*, comment, about, fred, the, char* and */
        # to see if it they are identifiers. This way we process only "*fred".
        # since the line gets chopped off at the "=".
        #
        # (Dont take "," out at this stage, since we are going to split on ",")
        #

        for ( f in follow ) {
            if ( j = index(line, f) ) {
                # god knows why i have to do this
                fred = substr(line, 1, j - 1)
                line = fred
            }
        }

        #
        # Split the line that remains on commas.
        #

        total_ids = split(line, identifiers, ",")

        #
        # Process each of the identifiers.
        #

        for ( i = 1; i <= total_ids; i++ ) {

            token = identifiers[i]

            if ( length( token ) == 0 )
                continue

            #
            # If there is a "(" present then this must be a function name
            # as in
            #
            # int silly()
            #
            # so we just continue.
            #

            if ( index(token, "(") )
                continue

            #
            # Strip off leading white space and * characters.
            #

            while ( (first = substr(token, 1, 1) ) == "*" || \
                first == " " || first == "  " )

                token = substr(token, 2, length(token) - 1)

            #
            # Otherwise lets assume we have an identifier.
            # Check to see that it is not already in existence, if it is
            # then its too bad for the user, well throw this one away.
            #
            # (one alternative would be to prepend the function name (if there
            # is one) to the identifier name). But this is messy and probably
            # would never get used anyway.
            #

            if ( identifiers[ token ] == 1 )
                continue

            identifiers[ token ] = 1
            total_tokens++


            #
            # And finally set up the arrays for later use.
            #

            patterns[total_tokens] = $0
            tags[total_tokens] = token
            files[total_tokens] = FILENAME
        }
    }


    #
    # Finally, process all of the tags array.
    #
    # The search pattern is the entire line. Print a line that looks like
    #
    # token <tab> filename <tab> /<appropriately-escaped-pattern>/
    #

    END {
        for ( tok in patterns ) {

            pattern = patterns[tok]
            file = files[tok]
            tag = tags[tok]

            printf "%s\t%s\t/^", tag, file
            for ( i = 1; i <= length(pattern); i++ ) {

                if( index("^$/\\",  c = substr(pattern,i,1)) )
                    printf "\\"
                printf "%s", c
            }
            printf "$/\n"
        }
    }
#
# Send all of this into sort, merging the tags we created with ctags
#
' $* | sort -u -o tags - tags
	#!/bin/sh

	#PATH=/bin:/usr/ucb:/usr/bin
	#
	# usage: mytags [source-files]
	# Enhanced version of ctags.
	# Merge standard "ctags" and create extra tags from #define statements
	# and declarations.
	#
	# Declaration cases not handled:
	# ==============================
	#
	#
	# - Repeated identifier names.
	# ==========================
	# Only the first instance will be tagged.
	# Be careful about ^]'ing to tags that are in functions... you may
	# not get what you want. Worse, you might get put into another file
	# without getting what you want. You can always get back with ^^
	#
	#
	# - Lines of declarations that are continued with a comma e.g.
	# ==========================================================
	# int fred, harry, joe,
	# mike, dick;
	# Will not try to produce tags for mike or dick.
	#
	#
	# - Declaration lines that do not start with a type name e.g.
	# =========================================================
	# /* silly comment in the way */ int fred;
	# Will not tag fred.
	#
	#
	# - Declarations in comment blocks will be tagged e.g.
	# ==================================================
	# /* start of comment
	# int fred;
	# int harry;
	# end of comment */
	# Will produce tags for fred and harry (if they don't already exist).
	#
	#
	# Run ctags, create extra tags, sort.
	#
	# Note that vi searches in NOMAGIC mode, meaning
	# only ^ and $ have any effect. Thus we have
	# to escape these, and /\, but nothing else.
	# (Note also that due to a bug in vi you get left in
	# nomagic mode if the pattern isn't found)
	#
	# Steve Hayman (MFCF)
	# Terry Jones (F.U.N. Corporation) 18/10/87
	#

	if [ $# -eq 0 ]
	then
	echo usage: `basename $0` files
	exit 1
	fi


	#
	# Make the standard tags file with ctags.
	#

	ctags -w -t $*


	#
	# Do the additional tags
	#

	gawk '

	#
	# Initialise a few handy-dandy associative arrays.
	#

	BEGIN {
	keywd["char"]++
	keywd["int"]++
	keywd["long"]++
	keywd["double"]++
	keywd["float"]++
	keywd["short"]++
	keywd["register"]++
	keywd["static"]++
	keywd["void"]++
	keywd["unsigned"]++

	follow["["]++;
	follow["="]++
	follow[";"]++
	}

	#
	# The #define grabber.
	#

	NF > 0 && /^#[ ]*define/ {

	total_tokens++

	if ($1 == "#")
	token = $3
	else
	token = $2

	#
	# Careful with macro functions.
	#

	if ( i = index(token, "(") )
	token = substr(token, 1, i - 1)

	#
	# Set up these tags for later output (see END clause).
	#

	patterns[total_tokens] = $0
	files[total_tokens] = FILENAME
	tags[total_tokens] = token

	next
	}

	#
	# The declaration grabber.
	#

	#
	# Make sure we have some fields and that the first is a type name.
	# Could check that NF>1 but for declarations like int*fred;
	#

	NF > 0 && keywd[$1] == 1 {

	#
	# If the last field is a keyword then we must have something like
	#
	# unsigned int
	# silly()
	#
	# And so we should just continue to the next line
	# (We could probably do a getline before the next, but then again
	# they might just have a #define there... who knows? who cares?)
	#

	if ( keywd[$NF] )
	next

	#
	# Check to find the first word on the line that is not in the keywd
	# array. This must (famous last words) be the identifier we want.
	#

	for ( i = 2; i <= NF; i++ ) {
	if( keywd[$i] == 0 )
	break
	}

	#
	# Get the tail of the line, starting from the first identifier.
	#

	spot = index($0, $i)
	line = substr($0, spot, length - spot + 1)

	#
	# Strip trailing characters from line like ; and = and [ if present
	#
	# Dont break out of the for loop once you have found one as
	# this will make the order of their declaration in the START
	# clause important. Anyway, it is not clear who would come
	# first out of = and [
	#
	# We do this here since we want a line such as
	#
	# char fred="this is fred" / comment about fred the char* */
	#
	# to be cut off at the "=" instead of processing each of the ten fields
	# fred="this, is, fred", /, comment, about, fred, the, char* and */
	# to see if it they are identifiers. This way we process only "*fred".
	# since the line gets chopped off at the "=".
	#
	# (Dont take "," out at this stage, since we are going to split on ",")
	#

	for ( f in follow ) {
	if ( j = index(line, f) ) {
	# god knows why i have to do this
	fred = substr(line, 1, j - 1)
	line = fred
	}
	}

	#
	# Split the line that remains on commas.
	#

	total_ids = split(line, identifiers, ",")

	#
	# Process each of the identifiers.
	#

	for ( i = 1; i <= total_ids; i++ ) {

	token = identifiers[i]

	if ( length( token ) == 0 )
	continue

	#
	# If there is a "(" present then this must be a function name
	# as in
	#
	# int silly()
	#
	# so we just continue.
	#

	if ( index(token, "(") )
	continue

	#
	# Strip off leading white space and * characters.
	#

	while ( (first = substr(token, 1, 1) ) == "*" \|\| \
	first == " " \|\| first == " " )

	token = substr(token, 2, length(token) - 1)

	#
	# Otherwise lets assume we have an identifier.
	# Check to see that it is not already in existence, if it is
	# then its too bad for the user, well throw this one away.
	#
	# (one alternative would be to prepend the function name (if there
	# is one) to the identifier name). But this is messy and probably
	# would never get used anyway.
	#

	if ( identifiers[ token ] == 1 )
	continue

	identifiers[ token ] = 1
	total_tokens++


	#
	# And finally set up the arrays for later use.
	#

	patterns[total_tokens] = $0
	tags[total_tokens] = token
	files[total_tokens] = FILENAME
	}
	}


	#
	# Finally, process all of the tags array.
	#
	# The search pattern is the entire line. Print a line that looks like
	#
	# token <tab> filename <tab> /<appropriately-escaped-pattern>/
	#

	END {
	for ( tok in patterns ) {

	pattern = patterns[tok]
	file = files[tok]
	tag = tags[tok]

	printf "%s\t%s\t/^", tag, file
	for ( i = 1; i <= length(pattern); i++ ) {

	if( index("^$/\\", c = substr(pattern,i,1)) )
	printf "\\"
	printf "%s", c
	}
	printf "$/\n"
	}
	}
	#
	# Send all of this into sort, merging the tags we created with ctags
	#
	' $* \| sort -u -o tags - tags