CodeZombie/4chan_image_dumper.c

## 4chan_image_dumper.c
#include <stdio.h>
#include <sys/stat.h>
#include <stdlib.h>
#include "curl/curl.h"
#include <string.h>

/* Use libcurl-openssl-dev
 * in geany, only use f9 to compile the project and f5 to run it. the buttons at the top dont do the same thing.
 * Next, -l flags should be added to the end of the 'build' command. Not compile
 * yes, I'm new to C :)
 * */

struct MemoryStruct {
	char *memory;
	size_t size;
};

int file_exists(const char *filename_){
	struct stat st;
	int result = stat(filename_, &st);
	return result == 0;
}

char *substring_sandwich(char *str_, char *substring_one_, char *substring_two_, int strict_) {
	char *target = NULL;
	char *start, *end;
	if( (start = strstr(str_, substring_one_)) ) {
		start += strlen(substring_one_);
		if( (end = strstr(start, substring_two_)) ) {
			target = (char*) malloc(end - start + 1);
			memcpy( target, start, end - start );
			target[end - start] = '\0';
		}else if(strict_ == 0) {
			//if we can't find substring_two_, and strict mode is off, return the entire rest of the string
			return start;
		}
	}
	return target;
}

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) {
	size_t written = fwrite(ptr, size, nmemb, stream);
	return written;
}

size_t static write_memory_callback(void *contents, size_t size, size_t nmemb, void *userp) {
	size_t realsize = size * nmemb;
	struct MemoryStruct *mem = (struct MemoryStruct *)userp;

	mem->memory = realloc(mem->memory, mem->size + realsize + 1);
	if(mem->memory == NULL) {
		printf("Not enough memory\n");
		return 0;
	}
	memcpy(&(mem->memory[mem->size]), contents, realsize);
	mem->size += realsize;
	mem->memory[mem->size] = 0;

	return realsize;
}

int main(void) {
	char *thread_url;
	char *thread_board = NULL;
	char *thread_id = NULL;
	char *newline_pos;
	size_t len;

	char thread_json_url[64];

    struct MemoryStruct response;
    response.memory = malloc(1);
    response.size = 0;

    CURL *curl = NULL;
    long http_code;

	int url_okay = 0;

	while(url_okay == 0) {
		printf("Please enter a 4chan thread URL: ");
		thread_url = NULL;
		getline(&thread_url, &len, stdin);

		if( (newline_pos=strchr(thread_url, '\n')) ) {//strip newline from end of thread_url
			*newline_pos = '\0';
		}

		url_okay = 1;

		if(strlen(thread_url) == 1){
			printf("\x1b[31mERROR: No text entered.\x1b[0m \n");
			url_okay = 0;
		}

		thread_board = substring_sandwich(thread_url, "4chan.org/", "/", 1);

		if(!thread_board) {
			printf("\x1b[31mERROR: Could not find board in URL\x1b[0m \n");
			url_okay = 0;
		}

		thread_id = substring_sandwich(thread_url, "/thread/", "/", 0);

		if(!thread_id) {
			printf("\x1b[31mERROR: Could not find thread from URL\x1b[0m \n");
			url_okay = 0;
		}

		if(url_okay == 1) {
			strcpy(thread_json_url, "http://a.4cdn.org/");
			strcat(thread_json_url, thread_board);
			strcat(thread_json_url, "/thread/");
			strcat(thread_json_url, thread_id);
			strcat(thread_json_url, ".json");

			curl = curl_easy_init();
				curl_easy_setopt(curl, CURLOPT_URL, thread_json_url);
				curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
				curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
				curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback);
				curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&response);
				curl_easy_perform(curl);
				curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
				if(http_code != 200) {
					if(http_code == 404) {
						printf("\x1b[31mERROR: Thread specified does not exist (404)\x1b[0m \n");
					}else {
						printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
					}
					url_okay = 0;
				}
			curl_easy_cleanup(curl);
		}
	}

	printf("\nSearching for images...\n\n");

	char *working_pointer = response.memory;//the pointer that jumps from image to image in the json data
    char *working_extension = NULL;
    char *working_timestamp = NULL;
	char working_image_url[64];
	char working_local_directory[64];
		strcpy(working_local_directory, thread_board);
		strcat(working_local_directory, thread_id);
		mkdir(working_local_directory, 0755);
	char working_local_filename[64];
	int images_found = 0;
	int download_skips = 0;
	FILE* file;

    while(1) {
		working_extension = substring_sandwich(working_pointer, "\"ext\":", ",", 1); //grab the file extension
		if(!working_extension) { break; } //if it can't find one, then there are no more images in the thread.
		working_extension++;//strip the first "
		working_extension[strlen(working_extension) - 1] = '\0'; //strip the last "

		working_timestamp = substring_sandwich(working_pointer, "\"tim\":", ",", 1);

		//generate image URL
		strcpy(working_image_url, "http://i.4cdn.org/");
		strcat(working_image_url, thread_board);
		strcat(working_image_url, "/");
		strcat(working_image_url, working_timestamp);
		strcat(working_image_url, working_extension);

		//generate local path+filename
		strcpy(working_local_filename, working_local_directory);
		strcat(working_local_filename, "/");
		strcat(working_local_filename, working_timestamp);
		strcat(working_local_filename, working_extension);

		working_pointer = strstr(working_pointer, "\"tim\":") + 6; //set the pointer up to find the next image in the json

		printf("Image: %s ", working_image_url);

		if(!file_exists(working_local_filename)) {
			printf("is being downloaded...\n");
			curl = curl_easy_init();
				curl_easy_setopt(curl, CURLOPT_URL, working_image_url);
				curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
				curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
				curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
				file = fopen(working_local_filename, "wb");
					curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
					curl_easy_perform(curl);
					curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
				fclose(file);
				if(http_code != 200) {
					if(http_code == 404) {
						printf("\x1b[31mERROR: File does not exist (404)\x1b[0m \n");
					}else {
						printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
					}
				}
			curl_easy_cleanup(curl);
		}
		else{
			printf("already exists. Skipped.\n");
			download_skips++;
		}
		images_found++;
	}
	if(images_found == 1) {//lol grammar
		printf("\nFound %i image. ", images_found);
	}else {
		printf("\nFound %i images. ", images_found);
	}

	if(images_found - download_skips == 1){ //more grammar
		printf("%i was downloaded.\n", (images_found - download_skips) );
	}
	else if(images_found - download_skips > 0) {
		printf("%i were downloaded.\n", (images_found - download_skips) );
	}
	return 0;
}
	#include <stdio.h>
	#include <sys/stat.h>
	#include <stdlib.h>
	#include "curl/curl.h"
	#include <string.h>

	/* Use libcurl-openssl-dev
	* in geany, only use f9 to compile the project and f5 to run it. the buttons at the top dont do the same thing.
	* Next, -l flags should be added to the end of the 'build' command. Not compile
	* yes, I'm new to C :)
	* */

	struct MemoryStruct {
	char *memory;
	size_t size;
	};

	int file_exists(const char *filename_){
	struct stat st;
	int result = stat(filename_, &st);
	return result == 0;
	}

	char substring_sandwich(char str_, char substring_one_, char substring_two_, int strict_) {
	char *target = NULL;
	char start, end;
	if( (start = strstr(str_, substring_one_)) ) {
	start += strlen(substring_one_);
	if( (end = strstr(start, substring_two_)) ) {
	target = (char*) malloc(end - start + 1);
	memcpy( target, start, end - start );
	target[end - start] = '\0';
	}else if(strict_ == 0) {
	//if we can't find substring_two_, and strict mode is off, return the entire rest of the string
	return start;
	}
	}
	return target;
	}

	size_t write_data(void ptr, size_t size, size_t nmemb, FILE stream) {
	size_t written = fwrite(ptr, size, nmemb, stream);
	return written;
	}

	size_t static write_memory_callback(void contents, size_t size, size_t nmemb, void userp) {
	size_t realsize = size * nmemb;
	struct MemoryStruct mem = (struct MemoryStruct )userp;

	mem->memory = realloc(mem->memory, mem->size + realsize + 1);
	if(mem->memory == NULL) {
	printf("Not enough memory\n");
	return 0;
	}
	memcpy(&(mem->memory[mem->size]), contents, realsize);
	mem->size += realsize;
	mem->memory[mem->size] = 0;

	return realsize;
	}

	int main(void) {
	char *thread_url;
	char *thread_board = NULL;
	char *thread_id = NULL;
	char *newline_pos;
	size_t len;

	char thread_json_url[64];

	struct MemoryStruct response;
	response.memory = malloc(1);
	response.size = 0;

	CURL *curl = NULL;
	long http_code;

	int url_okay = 0;

	while(url_okay == 0) {
	printf("Please enter a 4chan thread URL: ");
	thread_url = NULL;
	getline(&thread_url, &len, stdin);

	if( (newline_pos=strchr(thread_url, '\n')) ) {//strip newline from end of thread_url
	*newline_pos = '\0';
	}

	url_okay = 1;

	if(strlen(thread_url) == 1){
	printf("\x1b[31mERROR: No text entered.\x1b[0m \n");
	url_okay = 0;
	}

	thread_board = substring_sandwich(thread_url, "4chan.org/", "/", 1);

	if(!thread_board) {
	printf("\x1b[31mERROR: Could not find board in URL\x1b[0m \n");
	url_okay = 0;
	}

	thread_id = substring_sandwich(thread_url, "/thread/", "/", 0);

	if(!thread_id) {
	printf("\x1b[31mERROR: Could not find thread from URL\x1b[0m \n");
	url_okay = 0;
	}

	if(url_okay == 1) {
	strcpy(thread_json_url, "http://a.4cdn.org/");
	strcat(thread_json_url, thread_board);
	strcat(thread_json_url, "/thread/");
	strcat(thread_json_url, thread_id);
	strcat(thread_json_url, ".json");

	curl = curl_easy_init();
	curl_easy_setopt(curl, CURLOPT_URL, thread_json_url);
	curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback);
	curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&response);
	curl_easy_perform(curl);
	curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
	if(http_code != 200) {
	if(http_code == 404) {
	printf("\x1b[31mERROR: Thread specified does not exist (404)\x1b[0m \n");
	}else {
	printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
	}
	url_okay = 0;
	}
	curl_easy_cleanup(curl);
	}
	}

	printf("\nSearching for images...\n\n");

	char *working_pointer = response.memory;//the pointer that jumps from image to image in the json data
	char *working_extension = NULL;
	char *working_timestamp = NULL;
	char working_image_url[64];
	char working_local_directory[64];
	strcpy(working_local_directory, thread_board);
	strcat(working_local_directory, thread_id);
	mkdir(working_local_directory, 0755);
	char working_local_filename[64];
	int images_found = 0;
	int download_skips = 0;
	FILE* file;

	while(1) {
	working_extension = substring_sandwich(working_pointer, "\"ext\":", ",", 1); //grab the file extension
	if(!working_extension) { break; } //if it can't find one, then there are no more images in the thread.
	working_extension++;//strip the first "
	working_extension[strlen(working_extension) - 1] = '\0'; //strip the last "

	working_timestamp = substring_sandwich(working_pointer, "\"tim\":", ",", 1);

	//generate image URL
	strcpy(working_image_url, "http://i.4cdn.org/");
	strcat(working_image_url, thread_board);
	strcat(working_image_url, "/");
	strcat(working_image_url, working_timestamp);
	strcat(working_image_url, working_extension);

	//generate local path+filename
	strcpy(working_local_filename, working_local_directory);
	strcat(working_local_filename, "/");
	strcat(working_local_filename, working_timestamp);
	strcat(working_local_filename, working_extension);

	working_pointer = strstr(working_pointer, "\"tim\":") + 6; //set the pointer up to find the next image in the json

	printf("Image: %s ", working_image_url);

	if(!file_exists(working_local_filename)) {
	printf("is being downloaded...\n");
	curl = curl_easy_init();
	curl_easy_setopt(curl, CURLOPT_URL, working_image_url);
	curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
	file = fopen(working_local_filename, "wb");
	curl_easy_setopt(curl, CURLOPT_WRITEDATA, file);
	curl_easy_perform(curl);
	curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
	fclose(file);
	if(http_code != 200) {
	if(http_code == 404) {
	printf("\x1b[31mERROR: File does not exist (404)\x1b[0m \n");
	}else {
	printf("\x1b[31mERROR: Could not reach the server\x1b[0m \n");
	}
	}
	curl_easy_cleanup(curl);
	}
	else{
	printf("already exists. Skipped.\n");
	download_skips++;
	}
	images_found++;
	}
	if(images_found == 1) {//lol grammar
	printf("\nFound %i image. ", images_found);
	}else {
	printf("\nFound %i images. ", images_found);
	}

	if(images_found - download_skips == 1){ //more grammar
	printf("%i was downloaded.\n", (images_found - download_skips) );
	}
	else if(images_found - download_skips > 0) {
	printf("%i were downloaded.\n", (images_found - download_skips) );
	}
	return 0;
	}