Created
August 14, 2012 14:53
-
-
Save jcupitt/3349972 to your computer and use it in GitHub Desktop.
make Burrough-style cutups
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* burr - simple filter to do cut ups for you | |
* | |
* No more scissors paper and glue! Turn your manual pages into disturbing, | |
* impressionistic accounts of drug addiction amongst homosexual hispanics in | |
* 1920's San Francisco. Turn your thesis into a lurid description of the | |
* thoughts of a man-crab roaming the brass streets of Minraud under a white | |
* hot sky. Free yourself of the language virus! | |
* | |
* Very simple minded - we: | |
* | |
* - split the source text into words, remove punctuation and force | |
* everything to lower case. | |
* | |
* - generate random fragments of text by chosing between 1 and fragsize | |
* consecutive words from the source. | |
* | |
* - generate random phrases by glueing between 2 and phrasesize | |
* fragments together. | |
* | |
* - generate random sentences by glueing between 1 and sentsize phrases | |
* together. Separate the phrases with commas. | |
* | |
* - generate ramdom text by joining up lots of random sentences with | |
* ellipses. | |
* | |
* fragsize, phrasesize and sentsize can be set with the -f, -p and -s flags. | |
* -<n> sets the number of sentences it generates. -0 means print forever. | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <sys/types.h> /* Need time to seed random */ | |
#include <sys/timeb.h> | |
#define DEFAULT_FRAGSIZE 3 /* Sensible defaults */ | |
#define DEFAULT_PHRASESIZE 4 | |
#define DEFAULT_SENTSIZE 3 | |
#define DEFAULT_NUMSENT 10 | |
#define BUFFERSIZE 80 /* What we buffer input words in */ | |
#define USAGE "%s: usage: %s [-<n> -f<n> -p<n> -s<n> {<file name>}]\n" | |
/* Memory allocation. | |
*/ | |
extern char *malloc(); | |
#define GETMEM(T) ((T *) malloc( sizeof(T))) | |
/* Random numbers, and ftime for seeding them. | |
*/ | |
extern long random(); | |
extern void srandom(); | |
extern int ftime(); | |
/* Globals. | |
*/ | |
static int fragsize = DEFAULT_FRAGSIZE; /* Set 'em up, Joe! */ | |
static int phrasesize = DEFAULT_PHRASESIZE; | |
static int sentsize = DEFAULT_SENTSIZE; | |
static int numsent = DEFAULT_NUMSENT; | |
/* We hold the text as a linked list of words. | |
*/ | |
struct word { | |
char *wd; /* The word .. */ | |
struct word *next; /* And the next one */ | |
}; | |
/* And have a bigger struct wrapping that up. | |
*/ | |
struct wordlist { | |
struct word *start; /* Beginning of list */ | |
struct word *end; /* Last element in list */ | |
int size; /* Number of words */ | |
}; | |
/* Some functions to create these guys .. save a string in a word | |
*/ | |
struct word * | |
CreateWord( str ) | |
char *str; | |
{ struct word *res; | |
res = GETMEM( struct word ); | |
res->next = NULL; | |
res->wd = strdup( str ); | |
return res; | |
} | |
/* Append a word to a wordlist | |
*/ | |
void | |
AddWord( wdlist, str ) | |
struct wordlist *wdlist; | |
char *str; | |
{ struct word *wd = CreateWord( str ); | |
if( wdlist->start == NULL ) { | |
wdlist->start = wd; /* First word in list */ | |
wdlist->end = wd; | |
} | |
else { | |
wdlist->end->next = wd; /* Link on to existing list */ | |
wdlist->end = wd; | |
} | |
wdlist->size++; | |
} | |
/* Make a new wordlist | |
*/ | |
struct wordlist * | |
EmptyList() | |
{ struct wordlist *res = GETMEM( struct wordlist ); | |
res->start = NULL; | |
res->end = NULL; | |
res->size = 0; | |
return res; | |
} | |
/* Extract a word from a wordlist. Fall over if the number is out of range. | |
*/ | |
char * | |
ExtractWord( wdlist, i ) | |
struct wordlist *wdlist; | |
int i; | |
{ register struct word *p = wdlist->start; | |
if( i >= wdlist->size ) { | |
fprintf( stderr, "help!" ); | |
exit( 1 ); | |
} | |
while( i-- ) p = p->next; | |
return p->wd; | |
} | |
/* What's a word? A sequence of characters satisfying this! | |
*/ | |
int | |
isword( ch ) | |
char ch; | |
{ return( isalnum( ch ) || ispunct( ch ) ); | |
} | |
/* Read the next word off an input stream. We skip leading whitespace, then | |
* copy into a buffer while we see isword characters. Words larger than the | |
* buffer get chopped up. The string we return should be saved somewhere before | |
* calling this function again. Return NULL for end-of-input. | |
*/ | |
char * | |
NextWord( fd ) | |
FILE *fd; | |
{ static char buff[ BUFFERSIZE ]; | |
int n; /* Index through buffer */ | |
char ch; /* Character we have read */ | |
/* Skip whitespace and spot EOF. | |
*/ | |
while( isspace( ch = getc( fd ) ) ) ; | |
if( ch == EOF ) | |
return NULL; | |
/* Save ch until we are not a word. | |
*/ | |
for( n = 0; isword( ch ) && (n < (BUFFERSIZE - 1)); ch = getc( fd ) ) | |
buff[ n++ ] = ch; | |
/* Add a terminator and return | |
*/ | |
buff[n] = '\0'; | |
return buff; | |
} | |
/* Remove leading and trailing punctuation from a string. We remove leading | |
* punctuation by returning a new pointer which may have been moved up. We | |
* remove trailing punctuation by zapping in new '\0' characters. | |
*/ | |
char * | |
StripPunct( str ) | |
char *str; | |
{ char *start = str; | |
char *end = start + strlen( str ); /* Point at terminator */ | |
/* Move start up .. the '\0' will stop us going too far | |
*/ | |
while( ispunct( *start ) ) | |
start++; | |
/* Move the '\0' down. We have to check for end=start, to stop us | |
* going too far. | |
*/ | |
while( ispunct( *(end - 1) ) && (end > start) ) | |
*--end = '\0'; | |
return start; | |
} | |
/* Force all the letters in a string to lower case. Special cases: have to | |
* look out for "I", "I'd", "I'll" etc. | |
*/ | |
void | |
ZapCase( str ) | |
char *str; | |
{ if( strcmp( str, "I" ) == 0 ) | |
return; /* Just an "I" */ | |
if( (*str == 'I') && (strlen( str ) > 2) && ispunct( str[1] ) ) | |
str += 2; /* "I'" form. Skip the I' */ | |
for( ; *str != '\0'; str++ ) | |
if( isupper( *str ) ) | |
*str = tolower( *str ); | |
} | |
/* Read in each word in the file, do the above processing and if there's | |
* anything left add the new word to the wordlist. | |
*/ | |
void | |
ReadText( fd, wdlist ) | |
FILE *fd; | |
struct wordlist *wdlist; | |
{ char *wd; | |
while( (wd = NextWord( fd )) != NULL ) { | |
wd = StripPunct( wd ); /* Remove punct */ | |
ZapCase( wd ); /* Force case down */ | |
if( *wd != '\0' ) | |
AddWord( wdlist, wd ); /* Save it */ | |
} | |
} | |
/* Simple random numbers .. generate an int in the range 1 to n | |
*/ | |
int | |
rnd( n ) | |
int n; | |
{ long r = random(); | |
return( (int) ((r % n) + 1) ); | |
} | |
/* Generate a random fragment of text. Print the words to stdout, joined up | |
* with spaces. | |
*/ | |
void | |
PrintFrag( wdlist ) | |
struct wordlist *wdlist; | |
{ int nwords = rnd( fragsize ); | |
int index = rnd( wdlist->size - nwords + 1 ) - 1; | |
for( ; nwords > 0; nwords-- ) { | |
printf( "%s", ExtractWord( wdlist, index++ ) ); | |
if( nwords > 1 ) | |
printf( " " ); /* Print joining space */ | |
} | |
} | |
/* Generate a random phrase. | |
*/ | |
void | |
PrintPhrase( wdlist ) | |
struct wordlist *wdlist; | |
{ int nfrag; | |
for( nfrag = rnd( phrasesize ); nfrag > 0; nfrag-- ) { | |
PrintFrag( wdlist ); | |
if( nfrag > 1 ) | |
printf( " " ); /* Print joining space */ | |
} | |
} | |
/* Generate a random sentence. | |
*/ | |
void | |
PrintSent( wdlist ) | |
struct wordlist *wdlist; | |
{ int nphrase; | |
for( nphrase = rnd( sentsize ); nphrase > 0; nphrase-- ) { | |
PrintPhrase( wdlist ); | |
if( nphrase > 1 ) | |
printf( ", " ); /* Print joining comma */ | |
} | |
printf( " ...\n" ); /* And trailing ellipsis */ | |
} | |
/* Main function! Decode the flags, read in all the input and start spewing | |
* out text. | |
*/ | |
main( argc, argv ) | |
int argc; | |
char **argv; | |
{ struct wordlist *wdlist = EmptyList(); | |
int fileargs = 0; /* Were there file args */ | |
struct timeb ts; /* Read time into this */ | |
while( --argc ) | |
if( argv[argc][0] == '-' ) | |
switch( argv[argc][1] ) { | |
case 'f': | |
sscanf( &argv[argc][2], "%d", | |
&fragsize ); | |
break; | |
case 'p': | |
sscanf( &argv[argc][2], "%d", | |
&phrasesize ); | |
break; | |
case 's': | |
sscanf( &argv[argc][2], "%d", | |
&sentsize ); | |
break; | |
case '0': case '1': case '2': case '3': | |
case '4': case '5': case '6': case '7': | |
case '8': case '9': | |
sscanf( &argv[argc][1], "%d", | |
&numsent ); | |
break; | |
default: | |
fprintf( stderr, USAGE, argv[0], | |
argv[0] ); | |
exit( 1 ); | |
break; | |
} | |
else { | |
FILE *fd; | |
fd = fopen( argv[argc], "r" ); | |
if( fd == NULL ) { | |
fprintf( stderr, "%s: unable to read %s\n", | |
argv[0], argv[argc] ); | |
exit( 1 ); | |
} | |
ReadText( fd, wdlist ); | |
fclose( fd ); | |
fileargs = 1; | |
} | |
if( !fileargs ) | |
/* No file args .. read from stdin | |
*/ | |
ReadText( stdin, wdlist ); | |
/* Check we have enough words for PrintFrag | |
*/ | |
if( wdlist->size < fragsize ) { | |
fprintf( stderr, "%s: too few words (minimum %d)\n", | |
argv[0], fragsize ); | |
exit( 1 ); | |
} | |
/* All args done .. now seed the random number generator from | |
* the time. | |
*/ | |
ftime( &ts ); | |
srandom( (int) ts.millitm ); | |
/* Print random text. | |
*/ | |
do | |
PrintSent( wdlist ); | |
while( --numsent ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment