Skip to content

Instantly share code, notes, and snippets.

@x5f3759df
Created January 29, 2013 14:47
Simple images grabber for imageboards. Written in C. Uses pcre and libcurl. Usage: grabber [URL]
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#include <curl/curl.h>
#define USER_AGENT "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0"
struct MemoryStruct {
char *memory;
size_t size;
};
typedef struct URL {
char *protocol;
char *domain;
char *board;
} URL;
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if (mem->memory == NULL) {
printf("not enough memory (realloc returned NULL)\n");
exit(EXIT_FAILURE);
}
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
char * curlGrabURL (char * url ) {
CURL *curl_handle;
struct MemoryStruct chunk;
chunk.memory = malloc(1);
chunk.size = 0;
curl_global_init(CURL_GLOBAL_ALL);
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, USER_AGENT);
curl_easy_perform(curl_handle);
curl_easy_cleanup(curl_handle);
printf("%lu bytes retrieved\n", (long)chunk.size);
//if(chunk.memory)
// free(chunk.memory);
return chunk.memory;
}
void curlSaveImage (char* img_url) {
CURL *curl;
CURLcode res;
FILE *img;
char *filename = strrchr(img_url,'/');
if(filename != NULL)
filename++;
img = fopen(filename, "r");
curl = curl_easy_init();
if (curl) {
if(img) {
//printf("File %s exists, skiping.\n", filename);
fclose(img);
}
else {
img = fopen(filename, "wb");
printf("Downloading: %s\n", img_url);
curl_easy_setopt(curl, CURLOPT_URL, img_url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, img);
res = curl_easy_perform(curl);
if(res != CURLE_OK)
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
//curl_easy_cleanup(curl);
fclose(img);
}
}
}
void grepImages (char* data, char * head) {
pcre *reCompiled;
pcre_extra *pcreExtra;
int pcreExecRet;
int subStrVec[30];
const char *pcreErrorStr;
int pcreErrorOffset;
char *aStrRegex;
char url[32];
const char *psubStrMatchStr;
char *testStrings[] = { data, '\0'};
aStrRegex = "/src/[0-9]+(.jpg|.png|.gif)";
reCompiled = pcre_compile(aStrRegex, 0, &pcreErrorStr, &pcreErrorOffset, NULL);
if(reCompiled == NULL) {
printf("ERROR: Could not compile '%s': %s\n", aStrRegex, pcreErrorStr);
exit(1);
}
pcreExtra = pcre_study(reCompiled, 0, &pcreErrorStr);
if(pcreErrorStr != NULL) {
printf("ERROR: Could not study '%s': %s\n", aStrRegex, pcreErrorStr);
exit(1);
}
pcreExecRet = pcre_exec(reCompiled,
pcreExtra,
*testStrings,
strlen(*testStrings),
0,
0,
subStrVec,
30);
if(pcreExecRet > 0) {
pcre_get_substring(*testStrings, subStrVec, pcreExecRet, 0, &(psubStrMatchStr));
sprintf(url, "%s%s", head, psubStrMatchStr);
//printf("%s\n", url);
curlSaveImage(url);
}
pcre_free_substring(psubStrMatchStr);
pcre_free(reCompiled);
if(pcreExtra != NULL)
pcre_free(pcreExtra);
}
void grepURLs (char * url) {
char * str;
char * pch;
char * head;
str = curlGrabURL(url);
head = malloc(16 * sizeof(char));
URL nURL;
nURL.protocol = strtok(url, "/");
nURL.domain = strtok(NULL, "/");
nURL.board = strtok(NULL, "/");
sprintf(head, "%s//%s/%s", nURL.protocol, nURL.domain, nURL.board);
pch = strtok(str, ">");
while (pch != NULL) {
grepImages(pch, head);
pch = strtok(NULL, ">");
}
free(head);
}
int main(int argc, char *argv[]) {
if (argc <= 1) {
printf("Usage: grabber.exe [URL]");
}
else {
grepURLs(argv[1]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment