Skip to content

Instantly share code, notes, and snippets.

@jtsiomb
Created January 2, 2021 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jtsiomb/c1196c5a72b04148d97738749f0958b1 to your computer and use it in GitHub Desktop.
Save jtsiomb/c1196c5a72b04148d97738749f0958b1 to your computer and use it in GitHub Desktop.
text file cleanup: unix->dos line endings, missing newline at EOF, and trailing whitespace
#include <stdio.h>
#include <stdlib.h>
char *find_nonws(char *ptr, char *end);
int trailing_ws, fixed_trailing;
int main(int argc, char **argv) {
int i, j, count = 0;
for(i=1; i<argc; i++) {
if(argv[i][0] == '-') {
if(argv[i][2] == 0) {
switch(argv[i][1]) {
case 'w':
trailing_ws = 1;
break;
case 'h':
default:
goto usage;
}
} else {
goto usage;
}
} else {
count++;
}
}
if(!count) {
usage: printf("Usage %s [options] <file> [<file 2> ... <file N>]\n", argv[0]);
printf("Options:\n");
printf(" -w: clean trailing whitespace\n");
printf(" -h: print usage and exit\n");
return 1;
}
for(j=1; j<argc; j++) {
FILE *fp;
char *memfile, *ptr, *end;
long filesize;
int lines = 0;
if(argv[j][0] == '-') continue;
if(!(fp = fopen(argv[j], "rb"))) {
fprintf(stderr, "Could not open file %s for reading\n", argv[j]);
continue;
}
fseek(fp, 0, SEEK_END);
filesize = ftell(fp);
ptr = memfile = malloc(filesize);
if(!memfile) {
fprintf(stderr, "failed to allocate %ld bytes for: %s\n", filesize, argv[j]);
fclose(fp);
continue;
}
fseek(fp, 0, SEEK_SET);
for(i=0; i<filesize; i++) {
fread(ptr++, 1, 1, fp);
}
fclose(fp);
if(!(fp = fopen(argv[j], "wb"))) {
fprintf(stderr, "Could not open file %s for writing\n", argv[j]);
free(memfile);
continue;
}
ptr = memfile;
end = memfile + filesize;
while(ptr < end) {
switch(*ptr) {
case 13:
if(ptr[1] != 10) {
fputc(10, fp);
}
ptr++;
lines++;
break;
case 10:
fputc(*ptr++, fp);
break;
default:
if(trailing_ws && isspace(*ptr)) {
char *ws_end = find_nonws(ptr + 1, end);
if(ws_end > ptr && (*ws_end == 13 || *ws_end == 10)) {
ptr = ws_end;
fixed_trailing++;
} else {
fwrite(ptr, 1, ws_end - ptr, fp);
ptr = ws_end;
}
} else {
fputc(*ptr++, fp);
}
}
}
printf("%s - fixed %d line endings", argv[j], lines);
if(fixed_trailing) {
printf(", %d trailing w/s", fixed_trailing);
}
if(memfile[filesize - 1] != '\n') {
fputc('\n', fp);
puts(", and newline @ eof");
} else {
putchar('\n');
}
fclose(fp);
free(memfile);
fixed_trailing = 0;
}
return 0;
}
char *find_nonws(char *ptr, char *end)
{
while(ptr < end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\v')) ptr++;
return ptr;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment