Skip to content

Instantly share code, notes, and snippets.

@pdfcrowd
Last active March 30, 2023 07:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pdfcrowd/d7b21b06f043730d1d3dc551809dac28 to your computer and use it in GitHub Desktop.
Save pdfcrowd/d7b21b06f043730d1d3dc551809dac28 to your computer and use it in GitHub Desktop.
Convert a web page or HTML file to PDF in C++. Complete tutorial: https://pdfcrowd.com/blog/convert-html-to-pdf-in-cpp/
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <curl/curl.h>
const char* api_url = "https://api.pdfcrowd.com/convert/latest/";
const char* username = "demo";
const char* api_key = "ce544b6ea52a5621fb9d55f8b542d14d";
// structure for conversion options
struct FormField {
std::string name;
std::string value;
};
// structure for conversion files
struct FormFile {
std::string name;
std::string filename;
std::vector<char> data;
};
// buffer for conversion response
typedef std::vector<char> ResponseBody;
// helpers structure to ensure proper memory deallocation for libcurl
struct CurlHolder {
CURL* curl;
curl_slist* headers;
curl_mime* mime;
CurlHolder() {
curl = curl_easy_init();
if (curl) {
mime = curl_mime_init(curl);
}
headers = nullptr;
}
~CurlHolder() {
if(curl) {
curl_easy_cleanup(curl);
curl_mime_free(mime);
if(headers) {
curl_slist_free_all(headers);
}
curl_global_cleanup();
}
}
// make holder non-copyable
CurlHolder(const CurlHolder&) = delete;
CurlHolder& operator=(const CurlHolder&) = delete;
};
size_t write_callback(char* ptr, size_t size, size_t nmemb, void* userdata) {
ResponseBody* response_body = static_cast<ResponseBody*>(userdata);
size_t data_size = size * nmemb;
response_body->insert(response_body->end(), ptr, ptr + data_size);
return data_size;
}
long convert(const std::vector<FormField>& fields, const std::vector<FormFile>& files, const char* output_filename) {
// result -1 means error, otherwise it containse HTTP status code
// result 200 means success
// result greater than 200 means the conversion error, details: https://pdfcrowd.com/api/status-codes/
long result = -1;
// initialize CURL library
CurlHolder curl;
if(!curl.curl) {
std::cerr << "Failed to initialize libcurl" << std::endl;
return result;
}
// set conversion entry point
curl_easy_setopt(curl.curl, CURLOPT_URL, api_url);
// set Pdfcrowd username and API key
std::ostringstream pdfcrowd_credentials;
pdfcrowd_credentials << username << ":" << api_key;
curl_easy_setopt(curl.curl, CURLOPT_USERPWD, pdfcrowd_credentials.str().c_str());
curl_easy_setopt(curl.curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
// set HTTP multipart post data
curl_easy_setopt(curl.curl, CURLOPT_POST, 1L);
curl.headers = curl_slist_append(nullptr, "Content-Type: multipart/form-data");
curl.headers = curl_slist_append(curl.headers, "boundary=----------ThIs_Is_tHe_bOUnDary_$");
curl_easy_setopt(curl.curl, CURLOPT_HTTPHEADER, curl.headers);
for (const auto& field : fields) {
curl_mimepart* part = curl_mime_addpart(curl.mime);
curl_mime_name(part, field.name.c_str());
curl_mime_data(part, field.value.c_str(), CURL_ZERO_TERMINATED);
}
for (const auto& file : files) {
curl_mimepart* part = curl_mime_addpart(curl.mime);
curl_mime_name(part, file.name.c_str());
curl_mime_filename(part, file.filename.c_str());
curl_mime_type(part, "application/octet-stream");
curl_mime_data(part, file.data.data(), file.data.size());
}
curl_easy_setopt(curl.curl, CURLOPT_MIMEPOST, curl.mime);
// set the callback for writing the response into a memory buffer
ResponseBody response_body;
curl_easy_setopt(curl.curl, CURLOPT_WRITEDATA, static_cast<void*>(&response_body));
curl_easy_setopt(curl.curl, CURLOPT_WRITEFUNCTION, write_callback);
// perform the conversion
CURLcode res = curl_easy_perform(curl.curl);
if (res != CURLE_OK) {
std::cerr << "Failed to perform multipart post: " << curl_easy_strerror(res) << std::endl;
return result;
}
curl_easy_getinfo(curl.curl, CURLINFO_RESPONSE_CODE, &result);
if (result != 200) {
// print details about the Pdfcrowd error
std::cerr << "Pdfcrowd Error Code: " << result << std::endl;
std::cerr << "Pdfcrowd Error Details: " << response_body.data() << std::endl;
return result;
}
// success, write the result to the output file
std::ofstream output_file(output_filename, std::ios::out | std::ios::binary);
if (!output_file.is_open()) {
std::cerr << "Error opening output file: " << output_filename << std::endl;
return -1;
}
output_file.write(response_body.data(), response_body.size());
if (!output_file.good()) {
std::cerr << "Error writing to file: " << output_filename << std::endl;
result = -1;
}
output_file.close();
return result;
}
int convert_url_example() {
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"},
{"url", "https://example.com/"}
};
return convert(fields, std::vector<FormFile>(), "example_url.pdf");
}
int convert_text_example() {
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"},
{"text", "<h1>Hello from Pdfcrowd</h1>"}
};
return convert(fields, std::vector<FormFile>(), "example_text.pdf");
}
int convert_file_example() {
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"}
};
std::ifstream in_stream("your-file.html");
if(!in_stream) {
std::cerr << "Read file error" << std::endl;
return -1;
}
std::vector<char> file_data((std::istreambuf_iterator<char>(in_stream)),
std::istreambuf_iterator<char>());
std::vector<FormFile> files = {
{"file", "your-file.html", std::move(file_data)}
};
return convert(fields, files, "example_file.pdf");
}
int main() {
if(convert_url_example() != 200) {
std::cerr << "url test failed" << std::endl;
return -1;
}
if(convert_text_example() != 200) {
std::cerr << "text test failed" << std::endl;
return -1;
}
if(convert_file_example() != 200) {
std::cerr << "file test failed" << std::endl;
return -1;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment