Created
January 29, 2013 14:47
Simple images grabber for imageboards. Written in C. Uses pcre and libcurl.
Usage: grabber [URL]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <pcre.h> | |
#include <curl/curl.h> | |
#define USER_AGENT "Mozilla/5.0 (Windows NT 6.1; rv:18.0) Gecko/20100101 Firefox/18.0" | |
struct MemoryStruct { | |
char *memory; | |
size_t size; | |
}; | |
typedef struct URL { | |
char *protocol; | |
char *domain; | |
char *board; | |
} URL; | |
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) { | |
size_t realsize = size * nmemb; | |
struct MemoryStruct *mem = (struct MemoryStruct *)userp; | |
mem->memory = realloc(mem->memory, mem->size + realsize + 1); | |
if (mem->memory == NULL) { | |
printf("not enough memory (realloc returned NULL)\n"); | |
exit(EXIT_FAILURE); | |
} | |
memcpy(&(mem->memory[mem->size]), contents, realsize); | |
mem->size += realsize; | |
mem->memory[mem->size] = 0; | |
return realsize; | |
} | |
char * curlGrabURL (char * url ) { | |
CURL *curl_handle; | |
struct MemoryStruct chunk; | |
chunk.memory = malloc(1); | |
chunk.size = 0; | |
curl_global_init(CURL_GLOBAL_ALL); | |
curl_handle = curl_easy_init(); | |
curl_easy_setopt(curl_handle, CURLOPT_URL, url); | |
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); | |
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk); | |
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, USER_AGENT); | |
curl_easy_perform(curl_handle); | |
curl_easy_cleanup(curl_handle); | |
printf("%lu bytes retrieved\n", (long)chunk.size); | |
//if(chunk.memory) | |
// free(chunk.memory); | |
return chunk.memory; | |
} | |
void curlSaveImage (char* img_url) { | |
CURL *curl; | |
CURLcode res; | |
FILE *img; | |
char *filename = strrchr(img_url,'/'); | |
if(filename != NULL) | |
filename++; | |
img = fopen(filename, "r"); | |
curl = curl_easy_init(); | |
if (curl) { | |
if(img) { | |
//printf("File %s exists, skiping.\n", filename); | |
fclose(img); | |
} | |
else { | |
img = fopen(filename, "wb"); | |
printf("Downloading: %s\n", img_url); | |
curl_easy_setopt(curl, CURLOPT_URL, img_url); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, img); | |
res = curl_easy_perform(curl); | |
if(res != CURLE_OK) | |
fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res)); | |
//curl_easy_cleanup(curl); | |
fclose(img); | |
} | |
} | |
} | |
void grepImages (char* data, char * head) { | |
pcre *reCompiled; | |
pcre_extra *pcreExtra; | |
int pcreExecRet; | |
int subStrVec[30]; | |
const char *pcreErrorStr; | |
int pcreErrorOffset; | |
char *aStrRegex; | |
char url[32]; | |
const char *psubStrMatchStr; | |
char *testStrings[] = { data, '\0'}; | |
aStrRegex = "/src/[0-9]+(.jpg|.png|.gif)"; | |
reCompiled = pcre_compile(aStrRegex, 0, &pcreErrorStr, &pcreErrorOffset, NULL); | |
if(reCompiled == NULL) { | |
printf("ERROR: Could not compile '%s': %s\n", aStrRegex, pcreErrorStr); | |
exit(1); | |
} | |
pcreExtra = pcre_study(reCompiled, 0, &pcreErrorStr); | |
if(pcreErrorStr != NULL) { | |
printf("ERROR: Could not study '%s': %s\n", aStrRegex, pcreErrorStr); | |
exit(1); | |
} | |
pcreExecRet = pcre_exec(reCompiled, | |
pcreExtra, | |
*testStrings, | |
strlen(*testStrings), | |
0, | |
0, | |
subStrVec, | |
30); | |
if(pcreExecRet > 0) { | |
pcre_get_substring(*testStrings, subStrVec, pcreExecRet, 0, &(psubStrMatchStr)); | |
sprintf(url, "%s%s", head, psubStrMatchStr); | |
//printf("%s\n", url); | |
curlSaveImage(url); | |
} | |
pcre_free_substring(psubStrMatchStr); | |
pcre_free(reCompiled); | |
if(pcreExtra != NULL) | |
pcre_free(pcreExtra); | |
} | |
void grepURLs (char * url) { | |
char * str; | |
char * pch; | |
char * head; | |
str = curlGrabURL(url); | |
head = malloc(16 * sizeof(char)); | |
URL nURL; | |
nURL.protocol = strtok(url, "/"); | |
nURL.domain = strtok(NULL, "/"); | |
nURL.board = strtok(NULL, "/"); | |
sprintf(head, "%s//%s/%s", nURL.protocol, nURL.domain, nURL.board); | |
pch = strtok(str, ">"); | |
while (pch != NULL) { | |
grepImages(pch, head); | |
pch = strtok(NULL, ">"); | |
} | |
free(head); | |
} | |
int main(int argc, char *argv[]) { | |
if (argc <= 1) { | |
printf("Usage: grabber.exe [URL]"); | |
} | |
else { | |
grepURLs(argv[1]); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment