Skip to content

Instantly share code, notes, and snippets.

@RavuAlHemio
Created April 12, 2012 23:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RavuAlHemio/2371810 to your computer and use it in GitHub Desktop.
Save RavuAlHemio/2371810 to your computer and use it in GitHub Desktop.
mysed
/**
* @file mysed.c
*
* @brief An oversimplified implementation of sed.
*
* @author Ondřej Hošek <ondrej.hosek@tuwien.ac.at>
*/
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/** Size of line buffer, in characters. */
#define LINE_BUF_SIZE 1024
/**
* Name of the program.
*
* Since this is only written once and independent of threads et al.,
* it has been placed into module scope.
*/
static const char *progname = "(null)";
/**
* A structure storing the substitutions to be made.
*/
typedef struct sub_s
{
/** The string which should be replaced by the new string. */
char *oldstr;
/** The string which should replace the old string. */
char *newstr;
} sub_t;
/**
* Prints a message on how to execute this program and exits with a
* failure code.
*
* @warning This function does not return.
*/
static void usage(void)
{
(void)fprintf(stderr, "Usage: %s [-e script]... [file]...\n", progname);
exit(EXIT_FAILURE);
}
/**
* Frees the substitutions in the given array, and then the array itself.
*
* @param subs The array to free.
* @param nsubs Number of elements stored in the subs array.
*/
static void free_subs(sub_t *subs, size_t nsubs)
{
size_t i;
if (subs == NULL)
{
return;
}
for (i = 0; i < nsubs; ++i)
{
free(subs[i].oldstr);
free(subs[i].newstr);
}
free(subs);
}
/**
* Returns whether the given script is valid.
*
* @param scrstr Script string to validate.
* @return Whether the given script is valid.
*/
static bool script_valid(const char *scrstr)
{
/* format: s/oldstr/newstr/g */
size_t i;
size_t scrlen = strlen(scrstr);
size_t slashcount = 0;
/* check if it is at least 6 chars long ("s/x//g") */
if (scrlen < 6)
{
return false;
}
/* check if begins with s/ and oldstr is at least one character long */
if (scrstr[0] != 's' || scrstr[1] != '/' || scrstr[2] == '/')
{
return false;
}
/* check if ends with /g */
if (scrstr[scrlen-2] != '/' || scrstr[scrlen-1] != 'g')
{
return false;
}
/* check if contains exactly three slashes */
for (i = 0; i < scrlen; ++i)
{
if (scrstr[i] == '/')
{
++slashcount;
}
}
return (slashcount == 3);
}
/**
* Prints a standard-format error message to standard error.
*
* @param funcname Name of the function that failed.
*/
static void print_failure(const char *funcname)
{
(void)fprintf(stderr, "%s: %s: %s\n", progname, funcname, strerror(errno));
}
/**
* Counts the number of occurrences of a string in another.
*
* @param haystack The string in which to count the occurrences.
* @param needle The string whose occurrences to count.
* @return Number of occurrences of needle in haystack.
*/
static size_t count_occs(const char *haystack, const char *needle)
{
size_t ret = 0;
char *occ;
while ((occ = strstr(haystack, needle)) != NULL)
{
++ret;
haystack = occ + strlen(needle);
}
return ret;
}
/**
* Perform the given substitutions in order on the given file.
*
* @warning If a line is longer than LINE_BUF_SIZE-2 characters, the
* behavior is undefined.
*
* @param f File on which to perform substitutions.
* @param subs The substitutions to perform, in order.
* @param nsubs The number of substitutions to perform.
*/
static void perform_subs(FILE *f, sub_t *subs, size_t nsubs)
{
char *oldbuf, *newbuf;
char *oldwalker, *newwalker;
char *occ;
size_t i, subcount, newbuflen, copylen;
/* initial buffer allocation */
oldbuf = malloc(LINE_BUF_SIZE);
if (oldbuf == NULL)
{
print_failure("malloc");
free_subs(subs, nsubs);
exit(EXIT_FAILURE);
}
/* read lines */
while (fgets(oldbuf, LINE_BUF_SIZE, f) != NULL)
{
/* perform substitutions */
for (i = 0; i < nsubs; ++i)
{
/* calculate size of new buffer */
subcount = count_occs(oldbuf, subs[i].oldstr);
newbuflen = strlen(oldbuf) + subcount*strlen(subs[i].newstr) - subcount*strlen(subs[i].oldstr);
/* allocate new buffer */
newbuf = malloc(newbuflen+1);
if (newbuf == NULL)
{
print_failure("malloc");
free_subs(subs, nsubs);
exit(EXIT_FAILURE);
}
/* prepare walkers */
oldwalker = &oldbuf[0];
newwalker = &newbuf[0];
/* while there still is an occurrence */
while ((occ = strstr(oldwalker, subs[i].oldstr)) != NULL)
{
/* calculate number of chars to copy */
copylen = occ - oldwalker;
/* copy text up and until this occurrence */
(void)strncpy(newwalker, oldwalker, copylen);
/* advance walkers */
oldwalker += copylen;
newwalker += copylen;
/* copy new string instead of the old one */
(void)strncpy(newwalker, subs[i].newstr, strlen(subs[i].newstr));
/* advance walkers (differently!) */
newwalker += strlen(subs[i].newstr);
oldwalker += strlen(subs[i].oldstr);
}
/* copy the rest */
(void)strncpy(newwalker, oldwalker, strlen(oldwalker));
newwalker += strlen(oldwalker);
/* NUL-terminate -.- */
newwalker[0] = '\0';
/* swizzle buffers */
free(oldbuf);
oldbuf = newbuf;
}
/* output the completely replaced line */
(void)fputs(oldbuf, stdout);
/* renew the buffer */
free(oldbuf);
oldbuf = malloc(LINE_BUF_SIZE);
if (oldbuf == NULL)
{
print_failure("malloc");
free_subs(subs, nsubs);
exit(EXIT_FAILURE);
}
}
if (ferror(f))
{
(void)fprintf(stderr, "%s: I/O error\n", progname);
free(oldbuf);
free_subs(subs, nsubs);
exit(EXIT_FAILURE);
}
/* the final step */
free(oldbuf);
}
/**
* The main entry point of the program.
*
* @param argc Number of command-line arguments.
* @param argv The command-line arguments themselves.
* @return Exit code of the program -- zero if successful,
* nonzero otherwise.
*/
int main(int argc, char **argv)
{
int opt;
sub_t *subs = NULL;
size_t i, subcount = 0;
/* set program name */
if (argc > 0)
{
progname = argv[0];
}
/* parse args */
while ((opt = getopt(argc, argv, "e:")) != -1)
{
switch (opt)
{
case 'e':
{
sub_t *newsubs;
sub_t *thenewsub;
size_t oldlen, newlen;
/* validate */
if (!script_valid(optarg))
{
(void)fprintf(stderr, "%s: Invalid script '%s'\n", progname, optarg);
free_subs(subs, subcount);
exit(EXIT_FAILURE);
}
/* calculate string lengths */
oldlen = strcspn(optarg+2, "/"); /* skip s/ */
newlen = strcspn(optarg+oldlen+3, "/"); /* skip s/<oldstr>/ */
/* skip identity substitutions */
if (oldlen == newlen && strncmp(optarg+2, optarg+oldlen+3, oldlen) == 0)
{
continue;
}
/* resize array */
++subcount;
newsubs = realloc(subs, subcount*sizeof(*newsubs));
if (newsubs == NULL)
{
print_failure("realloc");
free_subs(subs, subcount);
exit(EXIT_FAILURE);
}
subs = newsubs;
/* store a "shortcut" */
thenewsub = &subs[subcount-1];
/* allocate space for the strings */
thenewsub->oldstr = malloc(oldlen+1);
if (thenewsub->oldstr == NULL)
{
print_failure("malloc");
thenewsub->newstr = NULL; /* so that free_subs works */
free_subs(subs, subcount);
exit(EXIT_FAILURE);
}
thenewsub->newstr = malloc(newlen+1);
if (thenewsub->newstr == NULL)
{
print_failure("malloc");
free_subs(subs, subcount);
exit(EXIT_FAILURE);
}
/* copy the strings */
(void)strncpy(thenewsub->oldstr, optarg+2, oldlen);
(void)strncpy(thenewsub->newstr, optarg+oldlen+3, newlen);
/* NUL-terminate because strncpy was specified by monkeys on opium */
thenewsub->oldstr[oldlen] = '\0';
thenewsub->newstr[newlen] = '\0';
break;
}
case '?':
free_subs(subs, subcount);
usage();
break;
default:
assert(0 && "default case of getopt switch");
}
}
/* calculate number of files */
if (optind < argc)
{
/* there are files */
for (i = optind; i < argc; ++i)
{
FILE *f = fopen(argv[i], "r");
if (f == NULL)
{
print_failure("fopen");
free_subs(subs, subcount);
exit(EXIT_FAILURE);
}
perform_subs(f, subs, subcount);
(void)fclose(f);
}
}
else
{
/* no files; read stdin */
perform_subs(stdin, subs, subcount);
}
free_subs(subs, subcount);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment