Skip to content

Instantly share code, notes, and snippets.

@CodeZombie
Created August 10, 2015 23:41
Show Gist options
  • Save CodeZombie/c60c6fc2b2e52d8c090d to your computer and use it in GitHub Desktop.
Save CodeZombie/c60c6fc2b2e52d8c090d to your computer and use it in GitHub Desktop.
4chan image dumper. Requires libcurl, stat and some standard C junk.
#include <stdio.h>
#include <sys/stat.h>
#include <stdlib.h>
#include "curl/curl.h"
#include <string.h>
/* Use libcurl-openssl-dev
* in geany, only use f9 to compile the project and f5 to run it. the buttons at the top dont do the same thing.
* Next, -l flags should be added to the end of the 'build' command. Not compile
* yes, I'm new to C :)
* */
struct MemoryStruct {
char *memory;
size_t size;
};
int file_exists(const char *filename_){
struct stat st;
int result = stat(filename_, &st);
return result == 0;
}
char *substring_sandwich(char *str_, char *substring_one_, char *substring_two_, int strict_) {
char *target = NULL;
char *start, *end;
if( (start = strstr(str_, substring_one_)) ) {
start += strlen(substring_one_);
if( (end = strstr(start, substring_two_)) ) {
target = (char*) malloc(end - start + 1);
memcpy( target, start, end - start );
target[end - start] = '\0';
}else if(strict_ == 0) {
//if we can't find substring_two_, and strict mode is off, return the entire rest of the string
return start;
}
}
return target;
}
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}
size_t static write_memory_callback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if(mem->memory == NULL) {
printf("Not enough memory\n");
return 0;
}
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
int main(void) {
char *thread_url;
char *thread_board = NULL;
char *thread_id = NULL;
char *newline_pos;
size_t len;
char thread_json_url[64];
struct MemoryStruct response;
response.memory = malloc(1);
response.size = 0;
CURL *curl = NULL;
long http_code;
int url_okay = 0;
while(url_okay == 0) {
printf("Please enter a 4chan thread URL: ");
thread_url = NULL;
getline(&thread_url, &len, stdin);
if( (newline_pos=strchr(thread_url, '\n')) ) {//strip newline from end of thread_url
*newline_pos = '\0';
}
url_okay = 1;
if(strlen(thread_url) == 1){
printf("\x1b[31mERROR: No text entered.\x1b[0m \n");
url_okay = 0;
}
thread_board = substring_sandwich(thread_url, "4chan.org/", "/", 1);
if(!thread_board) {
printf("\x1b[31mERROR: Could not find board in URL\x1b[0m \n");
url_okay = 0;
}
thread_id = substring_sandwich(thread_url, "/thread/", "/", 0);
if(!thread_id) {
printf("\x1b[31mERROR: Could not find thread from URL\x1b[0m \n");
url_okay = 0;
}
if(url_okay == 1) {
strcpy(thread_json_url, "http://a.4cdn.org/");
strcat(thread_json_url, thread_board);
strcat(thread_json_url, "/thread/");
strcat(thread_json_url, thread_id);
strcat(thread_json_url, ".json");
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, thread_json_url);
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&response);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
if(http_code != 200) {
if(http_code == 404) {
printf("\x1b[31mERROR: Thread specified does not exist (404)\x1b[0m \n");
}else {
printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
}
url_okay = 0;
}
curl_easy_cleanup(curl);
}
}
printf("\nSearching for images...\n\n");
char *working_pointer = response.memory;//the pointer that jumps from image to image in the json data
char *working_extension = NULL;
char *working_timestamp = NULL;
char working_image_url[64];
char working_local_directory[64];
strcpy(working_local_directory, thread_board);
strcat(working_local_directory, thread_id);
mkdir(working_local_directory, 0755);
char working_local_filename[64];
int images_found = 0;
int download_skips = 0;
FILE* file;
while(1) {
working_extension = substring_sandwich(working_pointer, "\"ext\":", ",", 1); //grab the file extension
if(!working_extension) { break; } //if it can't find one, then there are no more images in the thread.
working_extension++;//strip the first "
working_extension[strlen(working_extension) - 1] = '\0'; //strip the last "
working_timestamp = substring_sandwich(working_pointer, "\"tim\":", ",", 1);
//generate image URL
strcpy(working_image_url, "http://i.4cdn.org/");
strcat(working_image_url, thread_board);
strcat(working_image_url, "/");
strcat(working_image_url, working_timestamp);
strcat(working_image_url, working_extension);
//generate local path+filename
strcpy(working_local_filename, working_local_directory);
strcat(working_local_filename, "/");
strcat(working_local_filename, working_timestamp);
strcat(working_local_filename, working_extension);
working_pointer = strstr(working_pointer, "\"tim\":") + 6; //set the pointer up to find the next image in the json
printf("Image: %s ", working_image_url);
if(!file_exists(working_local_filename)) {
printf("is being downloaded...\n");
curl = curl_easy_init();
curl_easy_setopt(curl, CURLOPT_URL, working_image_url);
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
file = fopen(working_local_filename, "wb");
curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
curl_easy_perform(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
fclose(file);
if(http_code != 200) {
if(http_code == 404) {
printf("\x1b[31mERROR: File does not exist (404)\x1b[0m \n");
}else {
printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
}
}
curl_easy_cleanup(curl);
}
else{
printf("already exists. Skipped.\n");
download_skips++;
}
images_found++;
}
if(images_found == 1) {//lol grammar
printf("\nFound %i image. ", images_found);
}else {
printf("\nFound %i images. ", images_found);
}
if(images_found - download_skips == 1){ //more grammar
printf("%i was downloaded.\n", (images_found - download_skips) );
}
else if(images_found - download_skips > 0) {
printf("%i were downloaded.\n", (images_found - download_skips) );
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment