jcupitt/gist:3349972

## gistfile1.c
/* burr - simple filter to do cut ups for you
 *
 * No more scissors paper and glue! Turn your manual pages into disturbing,
 * impressionistic accounts of drug addiction amongst homosexual hispanics in
 * 1920's San Francisco. Turn your thesis into a lurid description of the
 * thoughts of a man-crab roaming the brass streets of Minraud under a white
 * hot sky. Free yourself of the language virus!
 *
 * Very simple minded - we:
 *
 *	- split the source text into words, remove punctuation and force
 *	everything to lower case.
 *
 *	- generate random fragments of text by chosing between 1 and fragsize
 *	consecutive words from the source.
 *
 *	- generate random phrases by glueing between 2 and phrasesize
 *	fragments together.
 *
 *	- generate random sentences by glueing between 1 and sentsize phrases
 *	together. Separate the phrases with commas.
 *
 *	- generate ramdom text by joining up lots of random sentences with
 *	ellipses.
 *
 * fragsize, phrasesize and sentsize can be set with the -f, -p and -s flags.
 * -<n> sets the number of sentences it generates. -0 means print forever.
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>			/* Need time to seed random */
#include <sys/timeb.h>

#define DEFAULT_FRAGSIZE 3		/* Sensible defaults */
#define DEFAULT_PHRASESIZE 4
#define DEFAULT_SENTSIZE 3
#define DEFAULT_NUMSENT 10
#define BUFFERSIZE 80			/* What we buffer input words in */
#define USAGE "%s: usage: %s [-<n> -f<n> -p<n> -s<n> {<file name>}]\n"

/* Memory allocation.
 */
extern char *malloc();
#define GETMEM(T) ((T *) malloc( sizeof(T)))

/* Random numbers, and ftime for seeding them.
 */
extern long random();
extern void srandom();
extern int ftime();

/* Globals.
 */
static int fragsize = DEFAULT_FRAGSIZE;	/* Set 'em up, Joe! */
static int phrasesize = DEFAULT_PHRASESIZE;
static int sentsize = DEFAULT_SENTSIZE;
static int numsent = DEFAULT_NUMSENT;

/* We hold the text as a linked list of words.
 */
struct word {
	char *wd;			/* The word .. */
	struct word *next;		/* And the next one */
};

/* And have a bigger struct wrapping that up.
 */
struct wordlist {
	struct word *start;		/* Beginning of list */
	struct word *end;		/* Last element in list */
	int size;			/* Number of words */
};

/* Some functions to create these guys .. save a string in a word
 */
struct word *
CreateWord( str )
char *str;
{	struct word *res;
	res = GETMEM( struct word );
	res->next = NULL;
	res->wd = strdup( str );
	return res;
}

/* Append a word to a wordlist
 */
void
AddWord( wdlist, str )
struct wordlist *wdlist;
char *str;
{	struct word *wd = CreateWord( str );

	if( wdlist->start == NULL ) {
		wdlist->start = wd;	/* First word in list */
		wdlist->end = wd;
	}
	else {
		wdlist->end->next = wd;	/* Link on to existing list */
		wdlist->end = wd;
	}
	wdlist->size++;
}

/* Make a new wordlist
 */
struct wordlist *
EmptyList()
{	struct wordlist *res = GETMEM( struct wordlist );
	res->start = NULL;
	res->end = NULL;
	res->size = 0;
	return res;
}

/* Extract a word from a wordlist. Fall over if the number is out of range.
 */
char *
ExtractWord( wdlist, i )
struct wordlist *wdlist;
int i;
{	register struct word *p = wdlist->start;
	if( i >= wdlist->size ) {
		fprintf( stderr, "help!" );
		exit( 1 );
	}
	while( i-- ) p = p->next;
	return p->wd;
}

/* What's a word? A sequence of characters satisfying this!
 */
int
isword( ch )
char ch;
{ 	return( isalnum( ch ) || ispunct( ch ) );
}

/* Read the next word off an input stream. We skip leading whitespace, then
 * copy into a buffer while we see isword characters. Words larger than the
 * buffer get chopped up. The string we return should be saved somewhere before
 * calling this function again. Return NULL for end-of-input.
 */
char *
NextWord( fd )
FILE *fd;
{	static char buff[ BUFFERSIZE ];
	int n;					/* Index through buffer */
	char ch;				/* Character we have read */

	/* Skip whitespace and spot EOF.
	 */
	while( isspace( ch = getc( fd ) ) ) ;
	if( ch == EOF )
		return NULL;

	/* Save ch until we are not a word.
	 */
	for( n = 0; isword( ch ) && (n < (BUFFERSIZE - 1)); ch = getc( fd ) )
		buff[ n++ ] = ch;

	/* Add a terminator and return
	 */
	buff[n] = '\0';
	return buff;
}

/* Remove leading and trailing punctuation from a string. We remove leading
 * punctuation by returning a new pointer which may have been moved up. We
 * remove trailing punctuation by zapping in new '\0' characters.
 */
char *
StripPunct( str )
char *str;
{	char *start = str;
	char *end = start + strlen( str );	/* Point at terminator */

	/* Move start up .. the '\0' will stop us going too far
	 */
	while( ispunct( *start ) )
		start++;

	/* Move the '\0' down. We have to check for end=start, to stop us
	 * going too far.
	 */
	while( ispunct( *(end - 1) ) && (end > start) )
		*--end = '\0';

	return start;
}

/* Force all the letters in a string to lower case. Special cases: have to
 * look out for "I", "I'd", "I'll" etc.
 */
void
ZapCase( str )
char *str;
{	if( strcmp( str, "I" ) == 0 )
		return;				/* Just an "I" */
	if( (*str == 'I') && (strlen( str ) > 2) && ispunct( str[1] ) )
		str += 2;			/* "I'" form. Skip the I' */

	for( ; *str != '\0'; str++ )
		if( isupper( *str ) )
			*str = tolower( *str );
}

/* Read in each word in the file, do the above processing and if there's
 * anything left add the new word to the wordlist.
 */
void
ReadText( fd, wdlist )
FILE *fd;
struct wordlist *wdlist;
{	char *wd;

	while( (wd = NextWord( fd )) != NULL ) {
		wd = StripPunct( wd );		/* Remove punct */
		ZapCase( wd );			/* Force case down */
		if( *wd != '\0' )
			AddWord( wdlist, wd );	/* Save it */
	}
}

/* Simple random numbers .. generate an int in the range 1 to n
 */
int
rnd( n )
int n;
{	long r = random();
	return( (int) ((r % n) + 1) );
}

/* Generate a random fragment of text. Print the words to stdout, joined up
 * with spaces.
 */
void
PrintFrag( wdlist )
struct wordlist *wdlist;
{	int nwords = rnd( fragsize );
	int index = rnd( wdlist->size - nwords + 1 ) - 1;

	for( ; nwords > 0; nwords-- ) {
		printf( "%s", ExtractWord( wdlist, index++ ) );
		if( nwords > 1 )
			printf( " " );		/* Print joining space */
	}
}

/* Generate a random phrase.
 */
void
PrintPhrase( wdlist )
struct wordlist *wdlist;
{	int nfrag;
	for( nfrag = rnd( phrasesize ); nfrag > 0; nfrag-- ) {
		PrintFrag( wdlist );
		if( nfrag > 1 )
			printf( " " );		/* Print joining space */
	}
}

/* Generate a random sentence.
 */
void
PrintSent( wdlist )
struct wordlist *wdlist;
{	int nphrase;
	for( nphrase = rnd( sentsize ); nphrase > 0; nphrase-- ) {
		PrintPhrase( wdlist );
		if( nphrase > 1 )
			printf( ", " );		/* Print joining comma */
	}
	printf( " ...\n" );			/* And trailing ellipsis */
}

/* Main function! Decode the flags, read in all the input and start spewing
 * out text.
 */
main( argc, argv )
int argc;
char **argv;
{	struct wordlist *wdlist = EmptyList();
	int fileargs = 0;			/* Were there file args */
	struct timeb ts;			/* Read time into this */

	while( --argc )
		if( argv[argc][0] == '-' )
			switch( argv[argc][1] ) {
				case 'f':
					sscanf( &argv[argc][2], "%d",
							&fragsize );
					break;
				case 'p':
					sscanf( &argv[argc][2], "%d",
							&phrasesize );
					break;
				case 's':
					sscanf( &argv[argc][2], "%d",
							&sentsize );
					break;
				case '0': case '1': case '2': case '3':
				case '4': case '5': case '6': case '7':
				case '8': case '9':
					sscanf( &argv[argc][1], "%d",
							&numsent );
					break;
				default:
					fprintf( stderr, USAGE, argv[0],
							argv[0] );
					exit( 1 );
					break;
			}
		else {
			FILE *fd;
			fd = fopen( argv[argc], "r" );
			if( fd == NULL ) {
				fprintf( stderr, "%s: unable to read %s\n",
						argv[0], argv[argc] );
				exit( 1 );
			}
			ReadText( fd, wdlist );
			fclose( fd );
			fileargs = 1;
		}

	if( !fileargs )
		/* No file args .. read from stdin
		 */
		ReadText( stdin, wdlist );

	/* Check we have enough words for PrintFrag
	 */
	if( wdlist->size < fragsize ) {
		fprintf( stderr, "%s: too few words (minimum %d)\n",
				argv[0], fragsize );
		exit( 1 );
	}

	/* All args done .. now seed the random number generator from
	 * the time.
	 */
	ftime( &ts );
	srandom( (int) ts.millitm );

	/* Print random text.
	 */
	do
		PrintSent( wdlist );
	while( --numsent );
}
	/* burr - simple filter to do cut ups for you
	*
	* No more scissors paper and glue! Turn your manual pages into disturbing,
	* impressionistic accounts of drug addiction amongst homosexual hispanics in
	* 1920's San Francisco. Turn your thesis into a lurid description of the
	* thoughts of a man-crab roaming the brass streets of Minraud under a white
	* hot sky. Free yourself of the language virus!
	*
	* Very simple minded - we:
	*
	* - split the source text into words, remove punctuation and force
	* everything to lower case.
	*
	* - generate random fragments of text by chosing between 1 and fragsize
	* consecutive words from the source.
	*
	* - generate random phrases by glueing between 2 and phrasesize
	* fragments together.
	*
	* - generate random sentences by glueing between 1 and sentsize phrases
	* together. Separate the phrases with commas.
	*
	* - generate ramdom text by joining up lots of random sentences with
	* ellipses.
	*
	* fragsize, phrasesize and sentsize can be set with the -f, -p and -s flags.
	* -<n> sets the number of sentences it generates. -0 means print forever.
	*/

	#include <stdio.h>
	#include <string.h>
	#include <ctype.h>
	#include <sys/types.h> /* Need time to seed random */
	#include <sys/timeb.h>

	#define DEFAULT_FRAGSIZE 3 /* Sensible defaults */
	#define DEFAULT_PHRASESIZE 4
	#define DEFAULT_SENTSIZE 3
	#define DEFAULT_NUMSENT 10
	#define BUFFERSIZE 80 /* What we buffer input words in */
	#define USAGE "%s: usage: %s [-<n> -f<n> -p<n> -s<n> {<file name>}]\n"

	/* Memory allocation.
	*/
	extern char *malloc();
	#define GETMEM(T) ((T *) malloc( sizeof(T)))

	/* Random numbers, and ftime for seeding them.
	*/
	extern long random();
	extern void srandom();
	extern int ftime();

	/* Globals.
	*/
	static int fragsize = DEFAULT_FRAGSIZE; /* Set 'em up, Joe! */
	static int phrasesize = DEFAULT_PHRASESIZE;
	static int sentsize = DEFAULT_SENTSIZE;
	static int numsent = DEFAULT_NUMSENT;

	/* We hold the text as a linked list of words.
	*/
	struct word {
	char wd; / The word .. */
	struct word next; / And the next one */
	};

	/* And have a bigger struct wrapping that up.
	*/
	struct wordlist {
	struct word start; / Beginning of list */
	struct word end; / Last element in list */
	int size; /* Number of words */
	};

	/* Some functions to create these guys .. save a string in a word
	*/
	struct word *
	CreateWord( str )
	char *str;
	{ struct word *res;
	res = GETMEM( struct word );
	res->next = NULL;
	res->wd = strdup( str );
	return res;
	}

	/* Append a word to a wordlist
	*/
	void
	AddWord( wdlist, str )
	struct wordlist *wdlist;
	char *str;
	{ struct word *wd = CreateWord( str );

	if( wdlist->start == NULL ) {
	wdlist->start = wd; /* First word in list */
	wdlist->end = wd;
	}
	else {
	wdlist->end->next = wd; /* Link on to existing list */
	wdlist->end = wd;
	}
	wdlist->size++;
	}

	/* Make a new wordlist
	*/
	struct wordlist *
	EmptyList()
	{ struct wordlist *res = GETMEM( struct wordlist );
	res->start = NULL;
	res->end = NULL;
	res->size = 0;
	return res;
	}

	/* Extract a word from a wordlist. Fall over if the number is out of range.
	*/
	char *
	ExtractWord( wdlist, i )
	struct wordlist *wdlist;
	int i;
	{ register struct word *p = wdlist->start;
	if( i >= wdlist->size ) {
	fprintf( stderr, "help!" );
	exit( 1 );
	}
	while( i-- ) p = p->next;
	return p->wd;
	}

	/* What's a word? A sequence of characters satisfying this!
	*/
	int
	isword( ch )
	char ch;
	{ return( isalnum( ch ) \|\| ispunct( ch ) );
	}

	/* Read the next word off an input stream. We skip leading whitespace, then
	* copy into a buffer while we see isword characters. Words larger than the
	* buffer get chopped up. The string we return should be saved somewhere before
	* calling this function again. Return NULL for end-of-input.
	*/
	char *
	NextWord( fd )
	FILE *fd;
	{ static char buff[ BUFFERSIZE ];
	int n; /* Index through buffer */
	char ch; /* Character we have read */

	/* Skip whitespace and spot EOF.
	*/
	while( isspace( ch = getc( fd ) ) ) ;
	if( ch == EOF )
	return NULL;

	/* Save ch until we are not a word.
	*/
	for( n = 0; isword( ch ) && (n < (BUFFERSIZE - 1)); ch = getc( fd ) )
	buff[ n++ ] = ch;

	/* Add a terminator and return
	*/
	buff[n] = '\0';
	return buff;
	}

	/* Remove leading and trailing punctuation from a string. We remove leading
	* punctuation by returning a new pointer which may have been moved up. We
	* remove trailing punctuation by zapping in new '\0' characters.
	*/
	char *
	StripPunct( str )
	char *str;
	{ char *start = str;
	char end = start + strlen( str ); / Point at terminator */

	/* Move start up .. the '\0' will stop us going too far
	*/
	while( ispunct( *start ) )
	start++;

	/* Move the '\0' down. We have to check for end=start, to stop us
	* going too far.
	*/
	while( ispunct( *(end - 1) ) && (end > start) )
	*--end = '\0';

	return start;
	}

	/* Force all the letters in a string to lower case. Special cases: have to
	* look out for "I", "I'd", "I'll" etc.
	*/
	void
	ZapCase( str )
	char *str;
	{ if( strcmp( str, "I" ) == 0 )
	return; /* Just an "I" */
	if( (*str == 'I') && (strlen( str ) > 2) && ispunct( str[1] ) )
	str += 2; /* "I'" form. Skip the I' */

	for( ; *str != '\0'; str++ )
	if( isupper( *str ) )
	str = tolower( str );
	}

	/* Read in each word in the file, do the above processing and if there's
	* anything left add the new word to the wordlist.
	*/
	void
	ReadText( fd, wdlist )
	FILE *fd;
	struct wordlist *wdlist;
	{ char *wd;

	while( (wd = NextWord( fd )) != NULL ) {
	wd = StripPunct( wd ); /* Remove punct */
	ZapCase( wd ); /* Force case down */
	if( *wd != '\0' )
	AddWord( wdlist, wd ); /* Save it */
	}
	}

	/* Simple random numbers .. generate an int in the range 1 to n
	*/
	int
	rnd( n )
	int n;
	{ long r = random();
	return( (int) ((r % n) + 1) );
	}

	/* Generate a random fragment of text. Print the words to stdout, joined up
	* with spaces.
	*/
	void
	PrintFrag( wdlist )
	struct wordlist *wdlist;
	{ int nwords = rnd( fragsize );
	int index = rnd( wdlist->size - nwords + 1 ) - 1;

	for( ; nwords > 0; nwords-- ) {
	printf( "%s", ExtractWord( wdlist, index++ ) );
	if( nwords > 1 )
	printf( " " ); /* Print joining space */
	}
	}

	/* Generate a random phrase.
	*/
	void
	PrintPhrase( wdlist )
	struct wordlist *wdlist;
	{ int nfrag;
	for( nfrag = rnd( phrasesize ); nfrag > 0; nfrag-- ) {
	PrintFrag( wdlist );
	if( nfrag > 1 )
	printf( " " ); /* Print joining space */
	}
	}

	/* Generate a random sentence.
	*/
	void
	PrintSent( wdlist )
	struct wordlist *wdlist;
	{ int nphrase;
	for( nphrase = rnd( sentsize ); nphrase > 0; nphrase-- ) {
	PrintPhrase( wdlist );
	if( nphrase > 1 )
	printf( ", " ); /* Print joining comma */
	}
	printf( " ...\n" ); /* And trailing ellipsis */
	}

	/* Main function! Decode the flags, read in all the input and start spewing
	* out text.
	*/
	main( argc, argv )
	int argc;
	char **argv;
	{ struct wordlist *wdlist = EmptyList();
	int fileargs = 0; /* Were there file args */
	struct timeb ts; /* Read time into this */

	while( --argc )
	if( argv[argc][0] == '-' )
	switch( argv[argc][1] ) {
	case 'f':
	sscanf( &argv[argc][2], "%d",
	&fragsize );
	break;
	case 'p':
	sscanf( &argv[argc][2], "%d",
	&phrasesize );
	break;
	case 's':
	sscanf( &argv[argc][2], "%d",
	&sentsize );
	break;
	case '0': case '1': case '2': case '3':
	case '4': case '5': case '6': case '7':
	case '8': case '9':
	sscanf( &argv[argc][1], "%d",
	&numsent );
	break;
	default:
	fprintf( stderr, USAGE, argv[0],
	argv[0] );
	exit( 1 );
	break;
	}
	else {
	FILE *fd;
	fd = fopen( argv[argc], "r" );
	if( fd == NULL ) {
	fprintf( stderr, "%s: unable to read %s\n",
	argv[0], argv[argc] );
	exit( 1 );
	}
	ReadText( fd, wdlist );
	fclose( fd );
	fileargs = 1;
	}

	if( !fileargs )
	/* No file args .. read from stdin
	*/
	ReadText( stdin, wdlist );

	/* Check we have enough words for PrintFrag
	*/
	if( wdlist->size < fragsize ) {
	fprintf( stderr, "%s: too few words (minimum %d)\n",
	argv[0], fragsize );
	exit( 1 );
	}

	/* All args done .. now seed the random number generator from
	* the time.
	*/
	ftime( &ts );
	srandom( (int) ts.millitm );

	/* Print random text.
	*/
	do
	PrintSent( wdlist );
	while( --numsent );
	}