Last active
March 25, 2022 17:48
-
-
Save krzys-h/4717608089c54f733083fd390e5c0f2b to your computer and use it in GitHub Desktop.
HTTrack remove param from query string plugin
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Remove sid= from query string plugin for HTTrack | |
// by krzys_h, 2018-05-15 | |
// https://gist.github.com/krzys-h/4717608089c54f733083fd390e5c0f2b | |
// This makes HTTrack automatically remove the sid= parameter from URLs it visits, as there is no way this can be done with commandline parameters (see https://forum.httrack.com/readmsg/27508/index.html) | |
// Compilation and usage: | |
// gcc -shared -o wrapper.so -fPIC -I/usr/include/httrack wrapper.c | |
// httrack --wrapper ./wrapper.so ... | |
// see https://www.httrack.com/html/plug.html | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <httrack-library.h> | |
#include <htsopt.h> | |
#include <htsdefines.h> | |
static int my_link_detected(t_hts_callbackarg *carg, httrackp* opt, | |
char* link, const const char* tag_start) | |
{ | |
for (char* query = strchr(link, '?'); query != NULL; query = strchr(query, '&')) | |
{ | |
++query; // skip the separator | |
if (*query == '\0') | |
break; | |
if (strncmp(query, "sid=", 4) == 0 || strncmp(query, "amp;sid=", 8) == 0) | |
{ | |
char* tgt = query; | |
char* src = strchr(query, '&'); | |
if (src != NULL) | |
{ | |
++src; // skip the separator | |
while(*src != '\0') | |
{ | |
*tgt++ = *src++; | |
} | |
} | |
*tgt = '\0'; | |
} | |
} | |
char* end = strchr(link, '\0'); | |
--end; // go to last valid character | |
while (*end == '&' || *end == '?') | |
*end-- = '\0'; // clean up trailing & or ? | |
//printf("%s\n", link); | |
return 1; /* yes, process this */ | |
} | |
EXTERNAL_FUNCTION int hts_plug(httrackp *opt, const char* argv) | |
{ | |
fprintf(stderr, "Module plugged\n"); | |
CHAIN_FUNCTION(opt, linkdetected2, my_link_detected, NULL); | |
return 1; /* success */ | |
} | |
EXTERNAL_FUNCTION int hts_unplug(httrackp *opt) | |
{ | |
fprintf(stderr, "Module unplugged\n"); | |
return 1; /* success */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice! Let me see if I can make it work on my own installation.