Skip to content

Instantly share code, notes, and snippets.

@clausecker
Last active January 14, 2019 08:32
Show Gist options
  • Save clausecker/dd024fef98a80726fde06a20526f4ab3 to your computer and use it in GitHub Desktop.
Save clausecker/dd024fef98a80726fde06a20526f4ab3 to your computer and use it in GitHub Desktop.
/* remove comments from C source files */
#include <stdio.h>
#include <stdlib.h>
/*
* read ahead buffer
*/
static char rabuf[BUFSIZ], *readahead = rabuf;
/*
* unget buffer
*/
static char ungetbuf[2], *unget = ungetbuf;
/*
* read one character into the read ahead buffer
* and return it. Return EOF on end of file.
*/
static int
rawinput(void)
{
int c;
if (unget > ungetbuf)
c = *--unget;
else
c = getchar();
if (c == EOF)
return (c);
if (readahead - rabuf == sizeof rabuf) {
fprintf(stderr, "readahead buffer full\n");
exit(EXIT_FAILURE);
}
*readahead++ = c;
return (c);
}
/*
* unread one character from stdin and remove it from the
* readahead buffer
*/
static void
backup(int c)
{
if (c != EOF) {
--readahead;
*unget++ = c;
}
}
/*
* clear the read ahead buffer
*/
static void
discard(void)
{
readahead = rabuf;
}
/*
* copy the read ahead buffer to stdout and clear it
*/
static void
copy(void)
{
fwrite(rabuf, sizeof *rabuf, readahead - rabuf, stdout);
discard();
}
/*
* Read one source code character into the read ahead buffer.
* Interpret relevant trigraphs and handle continuation lines.
*/
static int
input(void)
{
int c0, c1, c2, c3;
begin: c0 = rawinput();
switch(c0) {
case '\\':
/* continuation line? */
c1 = rawinput();
if (c1 == '\n')
goto begin;
backup(c1);
return (c0);
case '?':
c1 = rawinput();
if (c1 != '?') { /* trigraph? */
backup(c1);
return (c0);
}
c2 = rawinput();
if (c2 == '\'') /* ??' is ^ */
return ('^');
else if (c2 == '/') { /* ??/ is \ */
/* continuation line? */
c3 = rawinput();
if (c3 == '\n')
goto begin;
backup(c3);
return ('\\');
}
/* not a trigraph or one we do not care about */
backup(c2);
backup(c1);
return (c0);
default:
return (c0);
}
}
/*
* parse till the end of a line comment. The
* introductory // has already been parsed at this point.
*/
static void
linecomment(void)
{
int c;
do {
discard();
c = input();
} while (c != '\n' && c != EOF);
/* preserve final newline */
copy();
}
/*
* parse till the end of a regular comment. The
* introductory / * has already been parsed at this point.
*/
static void
comment(void)
{
int c0, c1;
do {
discard();
c0 = input();
if (c0 != '*')
continue;
do c1 = input();
while (c1 == '*');
if (c1 == '/')
break;
} while (c0 != EOF);
discard();
}
/*
* Parse till the end of a string literal or character constant.
* The introductory " or ' has already been parsed at this point.
* The delimiter is either " or ', depending on what we parse.
*/
static void
stringchar(int delim)
{
int c;
do {
c = input();
if (c == '\\')
input();
copy();
} while (c != delim && c != EOF);
}
/*
* read input and copy to output, removing comments
*/
static void
uncomment(void)
{
int c0, c1;
while (c0 = input(), c0 != EOF)
switch (c0) {
case '"':
case '\'':
stringchar(c0);
break;
case '/':
c1 = input();
if (c1 == '/')
linecomment();
else if (c1 == '*')
comment();
else
copy();
break;
default:
copy();
}
}
extern int
main(void)
{
uncomment();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment