Skip to content

Instantly share code, notes, and snippets.

@terrycojones
Created September 30, 2011 14:46
Show Gist options
  • Save terrycojones/1253956 to your computer and use it in GitHub Desktop.
Save terrycojones/1253956 to your computer and use it in GitHub Desktop.
mytags
#!/bin/sh
#PATH=/bin:/usr/ucb:/usr/bin
#
# usage: mytags [source-files]
# Enhanced version of ctags.
# Merge standard "ctags" and create extra tags from #define statements
# and declarations.
#
# Declaration cases not handled:
# ==============================
#
#
# - Repeated identifier names.
# ==========================
# Only the first instance will be tagged.
# Be careful about ^]'ing to tags that are in functions... you may
# not get what you want. Worse, you might get put into another file
# without getting what you want. You can always get back with ^^
#
#
# - Lines of declarations that are continued with a comma e.g.
# ==========================================================
# int fred, harry, joe,
# mike, dick;
# Will not try to produce tags for mike or dick.
#
#
# - Declaration lines that do not start with a type name e.g.
# =========================================================
# /* silly comment in the way */ int fred;
# Will not tag fred.
#
#
# - Declarations in comment blocks will be tagged e.g.
# ==================================================
# /* start of comment
# int fred;
# int harry;
# end of comment */
# Will produce tags for fred and harry (if they don't already exist).
#
#
# Run ctags, create extra tags, sort.
#
# Note that vi searches in NOMAGIC mode, meaning
# only ^ and $ have any effect. Thus we have
# to escape these, and /\, but nothing else.
# (Note also that due to a bug in vi you get left in
# nomagic mode if the pattern isn't found)
#
# Steve Hayman (MFCF)
# Terry Jones (F.U.N. Corporation) 18/10/87
#
if [ $# -eq 0 ]
then
echo usage: `basename $0` files
exit 1
fi
#
# Make the standard tags file with ctags.
#
ctags -w -t $*
#
# Do the additional tags
#
gawk '
#
# Initialise a few handy-dandy associative arrays.
#
BEGIN {
keywd["char"]++
keywd["int"]++
keywd["long"]++
keywd["double"]++
keywd["float"]++
keywd["short"]++
keywd["register"]++
keywd["static"]++
keywd["void"]++
keywd["unsigned"]++
follow["["]++;
follow["="]++
follow[";"]++
}
#
# The #define grabber.
#
NF > 0 && /^#[ ]*define/ {
total_tokens++
if ($1 == "#")
token = $3
else
token = $2
#
# Careful with macro functions.
#
if ( i = index(token, "(") )
token = substr(token, 1, i - 1)
#
# Set up these tags for later output (see END clause).
#
patterns[total_tokens] = $0
files[total_tokens] = FILENAME
tags[total_tokens] = token
next
}
#
# The declaration grabber.
#
#
# Make sure we have some fields and that the first is a type name.
# Could check that NF>1 but for declarations like int*fred;
#
NF > 0 && keywd[$1] == 1 {
#
# If the last field is a keyword then we must have something like
#
# unsigned int
# silly()
#
# And so we should just continue to the next line
# (We could probably do a getline before the next, but then again
# they might just have a #define there... who knows? who cares?)
#
if ( keywd[$NF] )
next
#
# Check to find the first word on the line that is not in the keywd
# array. This must (famous last words) be the identifier we want.
#
for ( i = 2; i <= NF; i++ ) {
if( keywd[$i] == 0 )
break
}
#
# Get the tail of the line, starting from the first identifier.
#
spot = index($0, $i)
line = substr($0, spot, length - spot + 1)
#
# Strip trailing characters from line like ; and = and [ if present
#
# *Dont* break out of the for loop once you have found one as
# this will make the order of their declaration in the START
# clause important. Anyway, it is not clear who would come
# first out of = and [
#
# We do this here since we want a line such as
#
# char *fred="this is fred" /* comment about fred the char* */
#
# to be cut off at the "=" instead of processing each of the ten fields
# *fred="this, is, fred", /*, comment, about, fred, the, char* and */
# to see if it they are identifiers. This way we process only "*fred".
# since the line gets chopped off at the "=".
#
# (Dont take "," out at this stage, since we are going to split on ",")
#
for ( f in follow ) {
if ( j = index(line, f) ) {
# god knows why i have to do this
fred = substr(line, 1, j - 1)
line = fred
}
}
#
# Split the line that remains on commas.
#
total_ids = split(line, identifiers, ",")
#
# Process each of the identifiers.
#
for ( i = 1; i <= total_ids; i++ ) {
token = identifiers[i]
if ( length( token ) == 0 )
continue
#
# If there is a "(" present then this must be a function name
# as in
#
# int silly()
#
# so we just continue.
#
if ( index(token, "(") )
continue
#
# Strip off leading white space and * characters.
#
while ( (first = substr(token, 1, 1) ) == "*" || \
first == " " || first == " " )
token = substr(token, 2, length(token) - 1)
#
# Otherwise lets assume we have an identifier.
# Check to see that it is not already in existence, if it is
# then its too bad for the user, well throw this one away.
#
# (one alternative would be to prepend the function name (if there
# is one) to the identifier name). But this is messy and probably
# would never get used anyway.
#
if ( identifiers[ token ] == 1 )
continue
identifiers[ token ] = 1
total_tokens++
#
# And finally set up the arrays for later use.
#
patterns[total_tokens] = $0
tags[total_tokens] = token
files[total_tokens] = FILENAME
}
}
#
# Finally, process all of the tags array.
#
# The search pattern is the entire line. Print a line that looks like
#
# token <tab> filename <tab> /<appropriately-escaped-pattern>/
#
END {
for ( tok in patterns ) {
pattern = patterns[tok]
file = files[tok]
tag = tags[tok]
printf "%s\t%s\t/^", tag, file
for ( i = 1; i <= length(pattern); i++ ) {
if( index("^$/\\", c = substr(pattern,i,1)) )
printf "\\"
printf "%s", c
}
printf "$/\n"
}
}
#
# Send all of this into sort, merging the tags we created with ctags
#
' $* | sort -u -o tags - tags
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment