Skip to content

Instantly share code, notes, and snippets.

@sonictk
Created August 13, 2018 15:50
Show Gist options
  • Save sonictk/b31580160eeb497e32e32480d3003e9d to your computer and use it in GitHub Desktop.
Save sonictk/b31580160eeb497e32e32480d3003e9d to your computer and use it in GitHub Desktop.
#include <arpa/inet.h>
#include <fcntl.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <cstdlib>
FileType translateToContentType(const char *contentTypeStr)
{
char *searchStr = (char *)malloc((strlen(contentTypeStr) + 1) * sizeof(char));
strcpy(searchStr, contentTypeStr);
char *found = strtok(searchStr, "/");
if (found == NULL) {
cerr << "Content type request was formatted incorrectly!\n";
free(searchStr);
return FileType::UNKNOWN;
}
if (strstr(found, "application") > 0 ||
strstr(found, "audio") > 0 ||
strstr(found, "font") > 0 ||
strstr(found, "image") > 0 ||
strstr(found, "video") > 0) {
free(searchStr);
return FileType::BINARY;
} else {
free(searchStr);
return FileType::TEXT;
}
}
FileType getContentType(const char *request)
{
const char *found = strstr(request, contentTypeToken);
if (found == NULL) {
cerr << "Could not find " << contentTypeToken << "in request header!\n";
return FileType::UNKNOWN;
}
const char *lineEnd = strstr(found, "\r\n\r\n");
if (lineEnd == NULL) {
cerr << "Request header was not formatted correctly!\n";
return FileType::UNKNOWN;
}
unsigned int contentTypeRequestLen = strlen(found) - strlen(lineEnd) + 1;
char *contentTypeRequest = (char *)malloc(contentTypeRequestLen * sizeof(char));
strncpy(contentTypeRequest, found, contentTypeRequestLen - 1);
contentTypeRequest[contentTypeRequestLen] = '\0';
FileType contentType = translateToContentType(contentTypeRequest);
free(contentTypeRequest);
return contentType;
}
unsigned int getLengthOfHTTPRequest(const char *resourcePath, const char *domainName)
{
unsigned int len = strlen(resourcePath) + strlen(domainName) + strlen(HTTPRequestTemplate) - 3;
return len;
}
int formatHTTPRequest(const char *resourcePath,
const char *domainName,
char *buf,
unsigned int size)
{
int len = snprintf(buf, size, HTTPRequestTemplate, resourcePath, domainName);
if (len < 0) {
cerr << "Could not format HTTP request: " << strerror(errno) << "\n";
return 0;
}
return len;
}
int getSizeOfContent(const char *response)
{
// TODO: (sonictk) Check if other types of status codes (like 201) need to be
// accounted for
const char *statusCode = strstr(response, statusCodeToken);
if (statusCode == NULL) {
return -1;
}
const char *contentLengthFound = strstr(response, contentLengthToken);
if (contentLengthFound == NULL) {
return -1;
}
// TODO: (sonictk) Might be a better way of parsing the HTTP response than doing this?
unsigned int contentLengthLenStart = strlen(contentLengthFound) - strlen(contentLengthToken) + 1;
char *contentLengthStart = (char *)malloc(contentLengthLenStart * sizeof(char));
strncpy(contentLengthStart, contentLengthFound + strlen(contentLengthToken), contentLengthLenStart);
const char *contentLengthEnd = strstr(contentLengthStart, "\r\n");
unsigned int contentLengthLen = strlen(contentLengthStart) - strlen(contentLengthEnd) + 1;
char *contentLengthStr = (char *)malloc(contentLengthLen * sizeof(char));
strncpy(contentLengthStr, contentLengthStart, contentLengthLen - 1);
int contentLength = atoi(contentLengthStr);
free(contentLengthStart);
free(contentLengthStr);
if (contentLength == 0) {
return -1;
}
return contentLength;
}
int getLengthOfHTTPHeader(char *response)
{
const char *delimiter = "\r\n\r\n";
char *contentStart = strstr(response, delimiter);
if (contentStart == NULL) {
return -1;
}
unsigned int headerLen = strlen(response) - strlen(contentStart) + strlen(delimiter);
return headerLen;
}
int writeToFile(FileType contentType, FILE *filep, void *content, unsigned int size)
{
int writeLen = 0;
switch (contentType) {
case FileType::TEXT:
writeLen = fprintf(filep, "%.*s", size, (char *)content);
break;
case FileType::BINARY:
writeLen = fwrite(content, sizeof(char), size, filep);
break;
default :
cerr << "Unknown content type!\n";
return -1;
}
return writeLen;
}
int downloadFile(const char *url, const char *outputPath, unsigned int timeoutMax)
{
if (url == NULL) {
cerr << "No valid URL provided!\n";
return -1;
}
if (outputPath == NULL) {
cerr << "No valid outpath path specified!\n";
return -1;
}
// NOTE: (sonictk) Find the domain name from the whole URL given so that we
// can convert it to a IP address that can be used for opening a socket with
// TODO: (sonictk) See if malloc-ing is the right thing to do or allocate from
// a pool instead
unsigned int urlLen = strlen(url);
char *searchStr = (char *)malloc((urlLen + 1) * sizeof(char));
strcpy(searchStr, url);
const char *protocolSeparator = "://";
unsigned int protocolSeparatorLen = strlen(protocolSeparator);
char *domainNameBuffer = (char *)malloc((urlLen + 1) * sizeof(char));
char *delimiter = strstr(searchStr, protocolSeparator);
if (delimiter != NULL) {
char *domainNameBufferTmp = delimiter + protocolSeparatorLen;
strcpy(domainNameBuffer, domainNameBufferTmp);
} else {
strcpy(domainNameBuffer, url);
}
strtok(domainNameBuffer, "/");
size_t domainNameLen = strlen(domainNameBuffer);
char *domainName = (char *)malloc((domainNameLen + 1) * sizeof(char));
int downloadResult = 0;
strcpy(domainName, domainNameBuffer);
if (strlen(domainName) == 0) {
cerr << "Could not determine the domain name!\n";
free(searchStr);
free(domainNameBuffer);
free(domainName);
return -1;
}
unsigned int delimiterLen;
if (delimiter == NULL) {
delimiterLen = 0;
} else {
delimiterLen = strlen(delimiter);
}
unsigned int protocolLen = urlLen - strlen(delimiter);
unsigned int resourcePathLen = urlLen - protocolLen - domainNameLen - protocolSeparatorLen;
char *resourcePath = (char *)malloc((resourcePathLen + 1) * sizeof(char));
unsigned int offset = domainNameLen + urlLen - delimiterLen + protocolSeparatorLen;
strcpy(resourcePath, url + offset);
struct addrinfo *addressResult, *iter, hints;
hints.ai_flags = AI_V4MAPPED|AI_ADDRCONFIG;
hints.ai_family = AF_INET; // NOTE: (sonictk) This forces IPv4 addresses only
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
hints.ai_addrlen = 0;
hints.ai_addr = NULL;
hints.ai_canonname = NULL;
hints.ai_next = NULL;
int result = getaddrinfo(domainName, "80", &hints, &addressResult);
if (result != 0) {
if (result == EAI_SYSTEM) {
cerr << "Could not get address information! System error\n";
} else {
cerr << "Error in getaddrinfo: " << gai_strerror(result) << "\n";
}
free(searchStr);
free(domainNameBuffer);
free(domainName);
free(resourcePath);
return -1;
}
FILE *filep = fopen(outputPath, "wb");
if (filep == NULL) {
cerr << "Could not open file for writing: " << strerror(errno) << "\n";
free(searchStr);
free(domainNameBuffer);
free(domainName);
free(resourcePath);
return -1;
}
for (iter = addressResult; iter != NULL; iter = iter->ai_next) {
int socketFileDesc = socket(iter->ai_family,
iter->ai_socktype,
iter->ai_protocol);
int optionVal = 1;
// NOTE: (sonictk) Allow socket to be re-used, otherwise the port will enter
// timeout state during which time cannot be re-bound to a new socket after
// the first socket is closed
setsockopt(socketFileDesc,
SOL_SOCKET,
SO_REUSEPORT,
&optionVal,
sizeof(optionVal));
// NOTE: (sonictk) Set socket to have non-blocking behaviour so that this
// can be called from a main thread without blocking the application
fcntl(socketFileDesc, F_SETFL, O_NONBLOCK);
if (socketFileDesc == -1) {
cerr << strerror(errno) << "\n";
continue;
}
connect(socketFileDesc, iter->ai_addr, iter->ai_addrlen);
struct timeval timeout;
timeout.tv_sec = timeoutMax;
timeout.tv_usec = 0;
fd_set readFileDescs, writeFileDescs;
FD_ZERO(&readFileDescs);
FD_ZERO(&writeFileDescs);
FD_SET(socketFileDesc, &writeFileDescs);
FD_SET(socketFileDesc, &readFileDescs);
int fileDescs = select(socketFileDesc + 1,
NULL,
&writeFileDescs,
NULL,
&timeout);
int responseRead = 0;
ssize_t lenResponse = 0;
switch (fileDescs) {
case 0:
cerr << "Timeout limit hit!\n";
break;
case -1:
cerr << "Error occurred during poll of socket: " << strerror(errno) << "\n";
break;
default:
int fileSize = 0;
int soError;
socklen_t length = sizeof(soError);
getsockopt(socketFileDesc, SOL_SOCKET, SO_ERROR, &soError, &length);
if (soError == 0) {
time_t start = time(NULL);
time_t timeoutLimit = timeoutMax;
time_t end = start + timeoutLimit;
unsigned int requestLen = getLengthOfHTTPRequest(resourcePath, domainName);
if (requestLen == 0) {
cerr << "Invalid HTTP request!\n";
return -1;
}
char *request = (char *)malloc(requestLen * sizeof(char));
formatHTTPRequest(resourcePath, domainName, request, requestLen);
while (start < end) {
if (FD_ISSET(socketFileDesc, &writeFileDescs)) {
write(socketFileDesc, request, strlen(request));
break;
}
start = time(NULL);
}
free(request);
start = time(NULL);
end = start + timeoutLimit;
char response[bufferSize];
FileType contentType = FileType::UNKNOWN;
while (start < end) {
memset(response, 0, bufferSize);
lenResponse = read(socketFileDesc, response, bufferSize);
std::cout << downloadResult << " of " << fileSize << std::endl;
// NOTE: (sonictk) If the file has finished downloading, stop reading
if (downloadResult > 0 && downloadResult >= fileSize) {
break;
}
if (lenResponse <= 0) {
// NOTE: (sonictk) If the read failed, retry after a short timeout
usleep(100000);
} else {
if (contentType == FileType::UNKNOWN) {
contentType = getContentType(response);
}
responseRead += lenResponse;
// TODO: (sonictk) Figure out how to condense this
if (fileSize == 0) {
fileSize = getSizeOfContent(response);
if (fileSize > 0) {
int headerLen = getLengthOfHTTPHeader(response);
if (headerLen == -1) {
cerr << "Invalid HTTP header!\n";
continue;
}
memset(response, 0, headerLen);
// TODO: (sonictk) Account for if filesize is less than buffer size
downloadResult = writeToFile(contentType,
filep,
response + headerLen,
bufferSize - headerLen);
}
} else {
size_t bytesToWrite = fileSize - downloadResult;
bytesToWrite = bytesToWrite > bufferSize ? bufferSize : bytesToWrite;
downloadResult += writeToFile(contentType,
filep,
response,
bytesToWrite);
}
}
start = time(NULL);
}
} else {
cerr << "Error occurred: " << strerror(errno) << "\n";
}
break;
}
// NOTE: (sonictk) If the download failed to complete before the timeout expired
if (responseRead < lenResponse) {
cerr << "File was not fully downloaded within the timeout!\n";
return -1;
}
// NOTE: (sonictk) Shutdown tells server also that there's no need to
// send any more data
shutdown(socketFileDesc, SHUT_RDWR);
close(socketFileDesc);
}
freeaddrinfo(iter);
fclose(filep);
free(searchStr);
free(domainNameBuffer);
free(domainName);
free(resourcePath);
return downloadResult;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment