Skip to content

Instantly share code, notes, and snippets.

@jtsiomb
Created March 24, 2018 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jtsiomb/b774f2a0b41a55a4f11b047cb28be0e7 to your computer and use it in GitHub Desktop.
Save jtsiomb/b774f2a0b41a55a4f11b047cb28be0e7 to your computer and use it in GitHub Desktop.
In-place search & replace efficiently even in huge files (UNIX)
/*! cc -o rinplace -pedantic -Wall -g rinplace.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
struct snode {
long offs, len;
struct snode *next;
};
struct snode *find(FILE *fp, const char *str);
int replace(FILE *fp, struct snode *slist, const char *repstr, char fillc);
int main(int argc, char **argv)
{
FILE *fp;
struct snode *slist;
if(argc != 4) {
fprintf(stderr, "Usage: %s <file> <search> <replace>\n", argv[0]);
return 1;
}
if(!(fp = fopen(argv[1], "r+"))) {
fprintf(stderr, "failed to open %s: %s\n", argv[1], strerror(errno));
return 1;
}
if(!(slist = find(fp, argv[2]))) {
printf("no match\n");
return 0;
}
if(replace(fp, slist, argv[3], ' ') == -1) {
fprintf(stderr, "replace failed\n");
return 1;
}
fclose(fp);
return 0;
}
struct snode *find(FILE *fp, const char *str)
{
struct snode *head = 0, *tail = 0;
char *buf;
int c;
int sz = strlen(str);
long offs;
if(!(buf = malloc(sz))) {
fprintf(stderr, "failed to allocate match buffer (%d bytes)\n", sz);
return 0;
}
while((c = fgetc(fp)) != -1) {
if(c == str[0]) {
offs = ftell(fp) - 1;
if(fread(buf, 1, sz - 1, fp) < sz - 1) {
return head;
}
if(memcmp(buf, str + 1, sz - 1) == 0) {
struct snode *n = malloc(sizeof *n);
if(!n) {
perror("failed to allocate change list node");
free(buf);
return 0;
}
n->offs = offs;
n->len = sz;
n->next = 0;
if(head) {
tail->next = n;
tail = n;
} else {
head = tail = n;
}
} else {
fseek(fp, offs + 1, SEEK_SET);
}
}
}
free(buf);
return head;
}
int replace(FILE *fp, struct snode *slist, const char *repstr, char fillc)
{
unsigned char *fptr, *sptr;
long mapoffs, mapsz, cplen, replen, count = 0;
struct snode *n;
if(!slist) return -1;
if(!repstr || !(replen = strlen(repstr))) {
return -1;
}
n = slist;
while(n->next) {
n = n->next;
}
mapoffs = slist->offs & ~0xfffL;
mapsz = n->offs + n->len - mapoffs;
if((fptr = mmap(0, mapsz, PROT_WRITE, MAP_SHARED, fileno(fp), mapoffs)) == (void*)-1) {
perror("failed to map file");
return -1;
}
n = slist;
while(n) {
cplen = n->len < replen ? n->len : replen;
sptr = fptr + n->offs - mapoffs;
memcpy(sptr, repstr, cplen);
if(cplen < n->len) {
memset(sptr + cplen, fillc, n->len - cplen);
}
++count;
n = n->next;
}
munmap(fptr, mapsz);
printf("performed %ld replacements\n", count);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment