Skip to content

Instantly share code, notes, and snippets.

@tinkertim
Last active September 28, 2022 18:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tinkertim/893e4a16d610d3e2bb3c60f2778f694f to your computer and use it in GitHub Desktop.
Save tinkertim/893e4a16d610d3e2bb3c60f2778f694f to your computer and use it in GitHub Desktop.
Grep + Awk in super small memory footprint form factor for use as a log saw on embedded systems.
/* Copyright (c) 2008, Tim Post <tinkertim@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of the original program's authors nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* Some example usages:
* grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages
* grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages
* grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog
* cat syslog | grawk dhclient '$0'
* cat myservice.log | grawk -F , error '$3'
*
* Contributors:
* Tim Post, Nicholas Clements, Alex Karlov
* We hope that you find this useful! */
/* FIXME:
* readline() should probably be renamed
*/
/* TODO:
* Add a tail -f like behavior that applies expressions and fields
* Recursive (like grep -r) or at least honor symlinks ? */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <regex.h>
#define VERSION "1.0.7"
#define MAINTAINER "Tim Post <echo@echoreply.us>"
/* Storage structure to hold awk-style pattern */
struct awk_pattern
{
int maxfield; /* Maximum field number for $# fields */
int numfields; /* Number of awk pattern fields */
char **fields; /* The awk pattern fields */
};
typedef struct awk_pattern awk_pat_t;
/* Option arguments */
static struct option const long_options[] = {
{ "ignore-case", no_argument, 0, 'i' },
{ "with-filename", no_argument, 0, 'W' },
{ "no-filename", no_argument, 0, 'w' },
{ "line-number", no_argument, 0, 'n' },
{ "field-separator", required_argument, 0, 'F' },
{ "help", no_argument, 0, 'h' },
{ "version", no_argument, 0, 'v' },
{ 0, 0, 0, 0}
};
/* The official name of the program */
const char *progname = "grawk";
/* Global for delimiters used in tokenizing strings */
char *tokdelim = NULL;
/* Prototypes */
static void usage(void);
static int process(FILE *, regex_t, awk_pat_t, char *, int);
static int process_line(char *, awk_pat_t, char *, char *);
static int process_files(int, char **, regex_t, awk_pat_t, int, int);
static int process_pipe(regex_t, awk_pat_t, int);
static int awkcomp(awk_pat_t *, char *);
static void awkfree(awk_pat_t *);
static char *readline(FILE *);
static void usage(void)
{
printf("%s %s\n", progname, VERSION);
printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n",
progname);
printf("Options:\n");
printf(" --help "
"show help and examples\n");
printf(" -i, --ignore-case "
"ignore case distinctions\n");
printf(" -W, --with-filename "
"Print filename for each match\n");
printf(" -w, --no-filename "
"Never print filename for each match\n");
printf(" -n, --line-number "
"Prefix each line of output with line number.\n");
printf(" -F fs, --field-separator=fs "
"Use fs as the field separator\n");
printf(" -h, --help "
"Print a brief help summary\n");
printf(" -v, --version "
"Print version information and exit normally\n");
printf(" PATTERN "
"a basic regular expression\n");
printf(" OUTPUT_PATTERN "
"awk-style print statement; defines "
"output fields\n");
printf("\nExamples:\n");
printf(" Retrieve joe123's home directory from /etc/passwd:\n");
printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname);
printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n");
printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname);
printf("\n Use as a simple grep:\n");
printf("\t%s \"string to find\" '$0' /file.txt\n", progname);
printf("\nReport bugs to %s\n", MAINTAINER);
}
/* readline() - read a line from the file handle.
* Return an allocated string */
static char *readline(FILE *fp)
{
char *str = (char *)NULL;
int ch = 0, len = 256, step = 256, i = 0;
str = (char *)malloc(len);
if (str == NULL)
return str;
while (1) {
ch = fgetc(fp);
if (feof(fp))
break;
if (ch == '\n' || ch == '\r') {
str[i++] = 0;
break;
}
str[i++] = ch;
if (i == len - 2) {
len += step;
str = (char *)realloc(str, len);
if (str == NULL) {
fclose(fp);
return str;
}
}
}
return str;
}
/* process() - this is the actual processing where we compare against a
* previously compiled grep pattern and output based on the awk pattern.
* The file is opened by the calling function. We pass in an empty string
* if we don't want to show the filename. If we want to show the line number,
* the value of show_lineno is 1. If we find a line, return 1. If no line is
* found, return 0. If an error occurs, return -1. */
static int process(FILE *fp, regex_t re, awk_pat_t awk,
char *filename, int show_lineno)
{
char *inbuf = NULL;
char slineno[32];
memset(slineno, 0, sizeof(slineno));
long lineno = 0;
int found = 0;
while (1) {
inbuf = readline(fp);
if (!inbuf)
break;
if (feof(fp))
break;
lineno++;
if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) {
found = 1; // Found a line.
if (show_lineno)
sprintf(slineno, "%ld:", lineno);
if (process_line(inbuf, awk, filename, slineno)) {
fprintf (stderr, "Error processing line [%s]\n", inbuf);
free (inbuf);
return -1;
}
}
free (inbuf);
}
if (inbuf)
free(inbuf);
return found;
}
/* process_files() - process one or more files from the command-line.
* If at least one line is found, return 1, else return 0 if no lines
* were found or an error occurs. */
static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk,
int show_filename, int show_lineno)
{
int i, found = 0;
FILE *fp = NULL;
struct stat fstat;
char filename[1024];
memset(filename, 0, sizeof(filename));
for(i = 0; i < numfiles; i++) {
if (stat(files[i], &fstat) == -1) {
/* Did a file get deleted from the time we started running? */
fprintf (stderr,
"Error accessing file %s. No such file\n", files[i]);
continue;
}
if (show_filename)
sprintf( filename, "%s:", files[i] );
/* For now, we aren't recursive. Perhaps allow symlinks? */
if ((fstat.st_mode & S_IFMT) != S_IFREG)
continue;
if (NULL == (fp = fopen(files[i], "r"))) {
fprintf(stderr,
"Error opening file %s. Permission denied\n", files[i]);
continue;
}
if (process(fp, re, awk, filename, show_lineno) == 1)
found = 1;
fclose(fp);
}
return found;
}
/* process_pipe() - process input from stdin */
static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno)
{
if (process(stdin, re, awk, "", show_lineno) == 1)
return 1;
return 0;
}
/* process_line() - process the line based on the awk-style pattern and output
* the results. */
static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno)
{
char full_line[3] = { '\1', '0', '\0' };
if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) {
/* If the caller only wants the whole string, oblige, quickly. */
fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf);
return 0;
}
/* Build an array of fields from the line using strtok()
* TODO: make this re-entrant so that grawk can be spawned as a thread */
char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *));
char *wrkbuf = strdup(inbuf), *tbuf;
int count = 0, n = 1, i;
for (i = 0; i < (awk.maxfield + 1); i++) {
linefields[i] = NULL;
}
tbuf = strtok(wrkbuf, tokdelim);
if(tbuf)
linefields[0] = strdup(tbuf);
while (tbuf != NULL) {
tbuf = strtok(NULL, tokdelim);
if (!tbuf)
break;
count++;
if (count > awk.maxfield)
break;
linefields[count] = strdup(tbuf);
if (!linefields[count]) {
fprintf(stderr, "Could not allocate memory to process file %s\n",
filename);
return -1;
}
}
/* For each field in the awk structure,
* find the field and print it to stdout.*/
fprintf(stdout, "%s%s", filename, lineno); /* if needed */
for (i = 0; i < awk.numfields; i++) {
if (awk.fields[i][0] == '\1') {
n = atoi(&awk.fields[i][1]);
if (n == 0) {
fprintf(stdout, "%s", inbuf);
continue;
}
if (linefields[n-1])
fprintf(stdout, "%s", linefields[n-1]);
continue;
} else
fprintf(stdout, "%s", awk.fields[i]);
}
fprintf(stdout, "\n");
/* Cleanup */
if (wrkbuf)
free(wrkbuf);
for (i = 0; i < count; i++) {
free(linefields[i]);
linefields[i] = (char *) NULL;
}
free(linefields);
linefields = (char **)NULL;
return 0;
}
/* awkcomp() - little awk-style print format compilation routine.
* Returns structure with the apattern broken down into an array for easier
* comparison and printing. Handles string literals as well as fields and
* delimiters. Example: $1,$2 " \$ and \"blah\" " $4
* Returns -1 on error, else 0. */
static int awkcomp(awk_pat_t *awk, char *apattern)
{
awk->maxfield = 0;
awk->numfields = 0;
awk->fields = NULL;
awk->fields = (char **)malloc(sizeof(char *));
int i, num = 0;
char *wrkbuf;
wrkbuf = (char *)malloc(strlen(apattern) + 1);
if (wrkbuf == NULL) {
free(awk);
fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n");
return -1;
}
int inString = 0, offs = 0;
char ch;
for (i = 0; i < strlen( apattern ); i++) {
ch = apattern[i];
if (inString && ch != '"' && ch != '\\') {
wrkbuf[offs++] = ch;
continue;
}
if (ch == ' ')
continue;
switch (ch) {
/* Handle delimited strings inside of literal strings */
case '\\':
if (inString) {
wrkbuf[offs++] = apattern[++i];
continue;
} else {
/* Unexpected and unconventional escape (can get these
* from improper invocations of sed in a pipe with grawk),
* if sed is used to build the field delimiters */
fprintf(stderr,
"Unexpected character \'\\\' in output format\n");
return -1;
}
break;
/* Beginning or ending of a literal string */
case '"':
inString = !inString;
if (inString)
continue;
break;
/* Handle the awk-like $# field variables */
case '$':
/* We use a non-printable ASCII character to
* delimit the string field values.*/
wrkbuf[offs++] = '\1';
/* We also need the max. field number */
num = 0;
while (1) {
ch = apattern[++i];
/* Not a number, exit this loop */
if (ch < 48 || ch > 57) {
i--;
break;
}
num = (num * 10) + (ch - 48);
wrkbuf[offs++] = ch;
}
if (num > awk->maxfield)
awk->maxfield = num;
/* Incomplete expression, a $ not followed by a number */
if (wrkbuf[1] == 0) {
fprintf(stderr, "Incomplete field descriptor at "
"or near character %d in awk pattern\n", i+1);
return -1;
}
break;
/* Field separator */
case ',':
wrkbuf[offs++] = ' ';
break;
}
/* if wrkbuf has nothing, we've got rubbish. Continue in the hopes
* that something else makes sense. */
if (offs == 0)
continue;
/* End of a field reached, put it into awk->fields */
wrkbuf[offs] = '\0';
awk->fields =
(char **)realloc(awk->fields, (awk->numfields + 1)
* sizeof(char *));
if (!awk->fields ) {
fprintf(stderr,
"Memory allocation error (awk->fields) in awkcomp()\n");
return -1;
}
awk->fields[awk->numfields] = strdup(wrkbuf);
if (!awk->fields[awk->numfields]) {
fprintf(stderr,
"Memory allocation error (awk->fields[%d]) in awkcomp()\n",
awk->numfields);
return -1;
}
memset(wrkbuf, 0, strlen(apattern) + 1);
awk->numfields++;
offs = 0;
}
free(wrkbuf);
if (awk->numfields == 0) {
fprintf(stderr,
"Unable to parse and compile the pattern; no fields found\n");
return -1;
}
return 0;
}
/* awkfree() - free a previously allocated awk_pat structure */
static void awkfree(awk_pat_t *awk )
{
int i;
for (i = 0; i < awk->numfields; i++)
free(awk->fields[i]);
free(awk->fields);
}
int main(int argc, char **argv)
{
char *apattern = NULL, *gpattern = NULL;
char **files = NULL;
int numfiles = 0, i = 0, c = 0;
int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0;
if (argc < 3) {
usage();
return EXIT_FAILURE;
}
tokdelim = strdup("\t\r\n ");
while (1) {
int opt_ind = 0;
while (c != -1) {
c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind);
switch (c) {
case 'w':
with_filename = 0;
no_filename = 1;
break;
case 'i':
ignore_case = 1;
break;
case 'W':
with_filename = 1;
no_filename = 0;
break;
case 'n':
line_number = 1;
break;
case 'F':
tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1);
memset(tokdelim, 0, 3 + strlen( optarg ) + 1);
sprintf(tokdelim, "\t\r\n%s", optarg);
break;
case 'h':
usage();
free(tokdelim);
return EXIT_SUCCESS;
break;
case 'v':
printf("%s\n", VERSION);
free(tokdelim);
return EXIT_SUCCESS;
break;
}
}
/* Now we'll grab our patterns and files. */
if ((argc - optind) < 2) {
usage();
free(tokdelim);
return EXIT_FAILURE;
}
/* pattern one will be our "grep" pattern */
gpattern = strdup(argv[optind]);
if (gpattern == NULL) {
fprintf(stderr, "Memory allocation error");
exit(EXIT_FAILURE);
}
optind++;
/* pattern two is our "awk" pattern */
apattern = strdup(argv[optind]);
if(apattern == NULL) {
fprintf(stderr, "Memory allocation error");
exit(EXIT_FAILURE);
}
optind++;
/* Anything that remains is a file or wildcard which should be
* expanded by the calling shell. */
if (optind < argc) {
numfiles = argc - optind;
files = (char **)malloc(sizeof(char *) * (numfiles + 1));
for (i = 0; i < numfiles; i++) {
files[i] = strdup(argv[optind + i]);
}
}
/* If the number of files is greater than 1 then we default to
* showing the filename unless specifically directed against it.*/
if (numfiles > 1 && no_filename == 0)
with_filename = 1;
break;
}
/* Process everything */
regex_t re;
int cflags = 0, rc = 0;
if (ignore_case)
cflags = REG_ICASE;
/* compile the regular expression parser */
if (regcomp(&re, gpattern, cflags)) {
fprintf(stderr,
"Error compiling grep-style pattern [%s]\n", gpattern);
return EXIT_FAILURE;
}
awk_pat_t awk;
if (awkcomp(&awk, apattern))
{
fprintf(stderr,
"Error compiling awk-style pattern [%s]\n", apattern);
return EXIT_FAILURE;
}
if (numfiles > 0) {
if(process_files(
numfiles, files, re, awk, with_filename, line_number) == 0)
rc = 255; // We'll return 255 if no lines were found.
} else {
if(process_pipe(re, awk, line_number) == 0)
rc = 255;
}
/* Destructor */
for (i = 0; i < numfiles; i++) {
if (files[i])
free(files[i]);
}
free(files);
/* Awk pattern */
free(apattern);
/* Grep pattern */
free(gpattern);
/* Grep regex */
regfree(&re);
/* Awk pattern structure */
awkfree(&awk);
/* Token delimiter (might have been freed elsewhere) */
if (tokdelim)
free(tokdelim);
return rc;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment