incertia/parser.c

## parser.c
#include "parser.h"

#include <ctype.h>
#include <stdlib.h>
#include <string.h>

typedef int BOOL;
#define TRUE    1
#define FALSE   0

char *s = NULL;
char *cur = NULL;

/* nextarg is like strtok, but it has its own delimiter set.
   nextarg will never return NULL when cmd is non-NULL.
   if cmd == NULL, then nextarg returns the next argument from the
   previous call to nextarg.
   the pointer returned from nextarg should be freed by the caller.
   char *s = "a bc \"def' `ghi\"";
   char *c = nextarg(s);
   while(c){
       printf("%s\n", c);
       free(c);
       c = nextarg(NULL);
   } */
char * nextarg(const char *cmd){
    int i = 0, j = 0;
    BOOL in_token = FALSE;
    char *r, *b, quote = '\0';

    /* we operate on our own local copy cur */
    if(cmd){
        if(cur) free(cur);
        s = cur = strdup(cmd);
    }
    if(!cur || !s) return NULL;

    /* search s, which will point to the end of the current token */
    b = s;
    while(*s){
        if(!in_token){
            /* we encountered what might not be a token */
            if(!isspace(*s)){
                in_token = TRUE;
                if(*s == '"' || *s == '\'' || *s == '`') quote = *s, b = s;
                else i++, b = s; /* increment char count */
                if(*s == '\\'){
                    /* fix edge case where the last char is \ */
                    if(!*(s + 1)){
                        *s = '\0';
                        if(i > 1) break;
                        else {
                            free(cur);
                            return cur = s = NULL;
                        }
                    }
                    s++; /* skip the escaped character */
                }
            }
        } else {
            /* we are inside a token */
            if(quote){
                if(quote == *s){
                    // we found the end
                    quote = '\0';
                    s++;
                    continue;
                }
            } else {
                if(isspace(*s)){
                    // this is the end
                    break;
                } else if(*s == '"' || *s == '\'' || *s == '`'){
                    quote = *s++;
                    continue;
                }
            }
            if(*s == '\\'){
                /* this covers the other edge case where last char is \ */
                if(!*(s + 1)){
                    *s = '\0';
                    if(i) break;
                    else {
                        free(cur);
                        return cur = s = NULL;
                    }
                }
                s++; /* skip the escaped character */
            }
            i++;
        }
        s++;
    }

    /* allocate the string */
    r = malloc((i + 1) * sizeof(char));

    /* fill the string */
    r[i] = '\0';
    quote = '\0';
    while(j < i){
        if(*b == '\\'){
            b++;
            r[j++] = *b++;
        } else if(*b == '"' || *b == '\'' || *b == '`')
            if(!quote) quote = *b++;
            else if(quote == *b) quote = '\0', b++;
            else r[j++] = *b++;
        else r[j++] = *b++;
    }

    while(*s && isspace(*s)) s++;
    if(!*s){
        free(cur);
        s = cur = NULL;
    }

    return r;
}

## parser.h
#ifndef __PARSER_H__
#define __PARSER_H__

char * nextarg(const char *cmd);

#endif /* #ifndef __PARSER_H__ */
	#include "parser.h"

	#include <ctype.h>
	#include <stdlib.h>
	#include <string.h>

	typedef int BOOL;
	#define TRUE 1
	#define FALSE 0

	char *s = NULL;
	char *cur = NULL;

	/* nextarg is like strtok, but it has its own delimiter set.
	nextarg will never return NULL when cmd is non-NULL.
	if cmd == NULL, then nextarg returns the next argument from the
	previous call to nextarg.
	the pointer returned from nextarg should be freed by the caller.
	char *s = "a bc \"def' `ghi\"";
	char *c = nextarg(s);
	while(c){
	printf("%s\n", c);
	free(c);
	c = nextarg(NULL);
	} */
	char * nextarg(const char *cmd){
	int i = 0, j = 0;
	BOOL in_token = FALSE;
	char r, b, quote = '\0';

	/* we operate on our own local copy cur */
	if(cmd){
	if(cur) free(cur);
	s = cur = strdup(cmd);
	}
	if(!cur \|\| !s) return NULL;

	/* search s, which will point to the end of the current token */
	b = s;
	while(*s){
	if(!in_token){
	/* we encountered what might not be a token */
	if(!isspace(*s)){
	in_token = TRUE;
	if(s == '"' \|\| s == '\'' \|\| s == '`') quote = s, b = s;
	else i++, b = s; /* increment char count */
	if(*s == '\\'){
	/* fix edge case where the last char is \ */
	if(!*(s + 1)){
	*s = '\0';
	if(i > 1) break;
	else {
	free(cur);
	return cur = s = NULL;
	}
	}
	s++; /* skip the escaped character */
	}
	}
	} else {
	/* we are inside a token */
	if(quote){
	if(quote == *s){
	// we found the end
	quote = '\0';
	s++;
	continue;
	}
	} else {
	if(isspace(*s)){
	// this is the end
	break;
	} else if(s == '"' \|\| s == '\'' \|\| *s == '`'){
	quote = *s++;
	continue;
	}
	}
	if(*s == '\\'){
	/* this covers the other edge case where last char is \ */
	if(!*(s + 1)){
	*s = '\0';
	if(i) break;
	else {
	free(cur);
	return cur = s = NULL;
	}
	}
	s++; /* skip the escaped character */
	}
	i++;
	}
	s++;
	}

	/* allocate the string */
	r = malloc((i + 1) * sizeof(char));

	/* fill the string */
	r[i] = '\0';
	quote = '\0';
	while(j < i){
	if(*b == '\\'){
	b++;
	r[j++] = *b++;
	} else if(b == '"' \|\| b == '\'' \|\| *b == '`')
	if(!quote) quote = *b++;
	else if(quote == *b) quote = '\0', b++;
	else r[j++] = *b++;
	else r[j++] = *b++;
	}

	while(s && isspace(s)) s++;
	if(!*s){
	free(cur);
	s = cur = NULL;
	}

	return r;
	}
	#ifndef __PARSER_H__
	#define __PARSER_H__

	char * nextarg(const char *cmd);

	#endif /* #ifndef __PARSER_H__ */