Created
August 10, 2015 23:41
-
-
Save CodeZombie/c60c6fc2b2e52d8c090d to your computer and use it in GitHub Desktop.
4chan image dumper. Requires libcurl, stat and some standard C junk.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <sys/stat.h> | |
#include <stdlib.h> | |
#include "curl/curl.h" | |
#include <string.h> | |
/* Use libcurl-openssl-dev | |
* in geany, only use f9 to compile the project and f5 to run it. the buttons at the top dont do the same thing. | |
* Next, -l flags should be added to the end of the 'build' command. Not compile | |
* yes, I'm new to C :) | |
* */ | |
struct MemoryStruct { | |
char *memory; | |
size_t size; | |
}; | |
int file_exists(const char *filename_){ | |
struct stat st; | |
int result = stat(filename_, &st); | |
return result == 0; | |
} | |
char *substring_sandwich(char *str_, char *substring_one_, char *substring_two_, int strict_) { | |
char *target = NULL; | |
char *start, *end; | |
if( (start = strstr(str_, substring_one_)) ) { | |
start += strlen(substring_one_); | |
if( (end = strstr(start, substring_two_)) ) { | |
target = (char*) malloc(end - start + 1); | |
memcpy( target, start, end - start ); | |
target[end - start] = '\0'; | |
}else if(strict_ == 0) { | |
//if we can't find substring_two_, and strict mode is off, return the entire rest of the string | |
return start; | |
} | |
} | |
return target; | |
} | |
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) { | |
size_t written = fwrite(ptr, size, nmemb, stream); | |
return written; | |
} | |
size_t static write_memory_callback(void *contents, size_t size, size_t nmemb, void *userp) { | |
size_t realsize = size * nmemb; | |
struct MemoryStruct *mem = (struct MemoryStruct *)userp; | |
mem->memory = realloc(mem->memory, mem->size + realsize + 1); | |
if(mem->memory == NULL) { | |
printf("Not enough memory\n"); | |
return 0; | |
} | |
memcpy(&(mem->memory[mem->size]), contents, realsize); | |
mem->size += realsize; | |
mem->memory[mem->size] = 0; | |
return realsize; | |
} | |
int main(void) { | |
char *thread_url; | |
char *thread_board = NULL; | |
char *thread_id = NULL; | |
char *newline_pos; | |
size_t len; | |
char thread_json_url[64]; | |
struct MemoryStruct response; | |
response.memory = malloc(1); | |
response.size = 0; | |
CURL *curl = NULL; | |
long http_code; | |
int url_okay = 0; | |
while(url_okay == 0) { | |
printf("Please enter a 4chan thread URL: "); | |
thread_url = NULL; | |
getline(&thread_url, &len, stdin); | |
if( (newline_pos=strchr(thread_url, '\n')) ) {//strip newline from end of thread_url | |
*newline_pos = '\0'; | |
} | |
url_okay = 1; | |
if(strlen(thread_url) == 1){ | |
printf("\x1b[31mERROR: No text entered.\x1b[0m \n"); | |
url_okay = 0; | |
} | |
thread_board = substring_sandwich(thread_url, "4chan.org/", "/", 1); | |
if(!thread_board) { | |
printf("\x1b[31mERROR: Could not find board in URL\x1b[0m \n"); | |
url_okay = 0; | |
} | |
thread_id = substring_sandwich(thread_url, "/thread/", "/", 0); | |
if(!thread_id) { | |
printf("\x1b[31mERROR: Could not find thread from URL\x1b[0m \n"); | |
url_okay = 0; | |
} | |
if(url_okay == 1) { | |
strcpy(thread_json_url, "http://a.4cdn.org/"); | |
strcat(thread_json_url, thread_board); | |
strcat(thread_json_url, "/thread/"); | |
strcat(thread_json_url, thread_id); | |
strcat(thread_json_url, ".json"); | |
curl = curl_easy_init(); | |
curl_easy_setopt(curl, CURLOPT_URL, thread_json_url); | |
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&response); | |
curl_easy_perform(curl); | |
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); | |
if(http_code != 200) { | |
if(http_code == 404) { | |
printf("\x1b[31mERROR: Thread specified does not exist (404)\x1b[0m \n"); | |
}else { | |
printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n"); | |
} | |
url_okay = 0; | |
} | |
curl_easy_cleanup(curl); | |
} | |
} | |
printf("\nSearching for images...\n\n"); | |
char *working_pointer = response.memory;//the pointer that jumps from image to image in the json data | |
char *working_extension = NULL; | |
char *working_timestamp = NULL; | |
char working_image_url[64]; | |
char working_local_directory[64]; | |
strcpy(working_local_directory, thread_board); | |
strcat(working_local_directory, thread_id); | |
mkdir(working_local_directory, 0755); | |
char working_local_filename[64]; | |
int images_found = 0; | |
int download_skips = 0; | |
FILE* file; | |
while(1) { | |
working_extension = substring_sandwich(working_pointer, "\"ext\":", ",", 1); //grab the file extension | |
if(!working_extension) { break; } //if it can't find one, then there are no more images in the thread. | |
working_extension++;//strip the first " | |
working_extension[strlen(working_extension) - 1] = '\0'; //strip the last " | |
working_timestamp = substring_sandwich(working_pointer, "\"tim\":", ",", 1); | |
//generate image URL | |
strcpy(working_image_url, "http://i.4cdn.org/"); | |
strcat(working_image_url, thread_board); | |
strcat(working_image_url, "/"); | |
strcat(working_image_url, working_timestamp); | |
strcat(working_image_url, working_extension); | |
//generate local path+filename | |
strcpy(working_local_filename, working_local_directory); | |
strcat(working_local_filename, "/"); | |
strcat(working_local_filename, working_timestamp); | |
strcat(working_local_filename, working_extension); | |
working_pointer = strstr(working_pointer, "\"tim\":") + 6; //set the pointer up to find the next image in the json | |
printf("Image: %s ", working_image_url); | |
if(!file_exists(working_local_filename)) { | |
printf("is being downloaded...\n"); | |
curl = curl_easy_init(); | |
curl_easy_setopt(curl, CURLOPT_URL, working_image_url); | |
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1); | |
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); | |
file = fopen(working_local_filename, "wb"); | |
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file); | |
curl_easy_perform(curl); | |
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); | |
fclose(file); | |
if(http_code != 200) { | |
if(http_code == 404) { | |
printf("\x1b[31mERROR: File does not exist (404)\x1b[0m \n"); | |
}else { | |
printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n"); | |
} | |
} | |
curl_easy_cleanup(curl); | |
} | |
else{ | |
printf("already exists. Skipped.\n"); | |
download_skips++; | |
} | |
images_found++; | |
} | |
if(images_found == 1) {//lol grammar | |
printf("\nFound %i image. ", images_found); | |
}else { | |
printf("\nFound %i images. ", images_found); | |
} | |
if(images_found - download_skips == 1){ //more grammar | |
printf("%i was downloaded.\n", (images_found - download_skips) ); | |
} | |
else if(images_found - download_skips > 0) { | |
printf("%i were downloaded.\n", (images_found - download_skips) ); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment