Skip to content

Instantly share code, notes, and snippets.

@khrona
Created July 28, 2011 22:23
Show Gist options
  • Save khrona/1112719 to your computer and use it in GitHub Desktop.
Save khrona/1112719 to your computer and use it in GitHub Desktop.
Captures a web-page to a JPEG (or series of JPEGs if the height is too great). Uses Awesomium 1.6.2 (r159+) and C API.
#include <Awesomium/awesomium_capi.h>
#include <string.h>
#include <stdio.h>
#if defined(__WIN32__) || defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <unistd.h>
#endif
// The URL to render
#define URL "http://en.wikipedia.org/wiki/World_Wide_Web"
// The base width of the web-page to render (we automatically resize
// to the width of the web-page but it's important that you specify
// this for web-pages that don't have a minimum width).
#define WIDTH 1000
// The maximum height of each image (the page will be split
// up into multiple images if it is larger than this size).
#define MAX_IMAGE_HEIGHT 1000
// Number of milliseconds to sleep during each update
#define SLEEP_MS 50
// Whether or not we should enable Flash plugins
#define ENABLE_PLUGINS false
// Number of updates to force at the end of load (to allow
// Flash content to render, etc.).
#define FORCE_NUM_UPDATES 10
// CSS script to disable scrollbars
#define SCROLLBAR_CSS "::-webkit-scrollbar { width: 0px; height: 0px; } "
// Global vars for the program
bool gotPageDimensions = false;
int cWidth = 0;
int cHeight = 0;
int cScrollY = 0;
// Some forward declarations:
void onGetScrollData(awe_webview* caller,
int contentWidth,
int contentHeight,
int preferredWidth,
int scrollX,
int scrollY);
void sleepMs(int sleepTime);
void updateCore();
void resizeNow(awe_webview* webView, int width, int height);
void scrollToNow(awe_webview* webView, int y);
void renderTo(awe_webview* webView, const char* filename);
/**
* Main Program: Automatically renders an entire web-page to
* a JPEG (or series of JPEGs if the web-page is
* greater than MAX_IMAGE_HEIGHT).
*/
int main()
{
// Disable scrollbar rendering
awe_string* custom_css_str = awe_string_create_from_ascii(
SCROLLBAR_CSS,
strlen(SCROLLBAR_CSS));
// Create our WebCore singleton with plugins enabled and our custom CSS
awe_webcore_initialize(ENABLE_PLUGINS, true, false, awe_string_empty(),
awe_string_empty(), awe_string_empty(),
AWE_LL_NORMAL, false, awe_string_empty(), true,
awe_string_empty(), awe_string_empty(),
awe_string_empty(), awe_string_empty(),
awe_string_empty(), awe_string_empty(), false, 0,
false, false, custom_css_str);
awe_string_destroy(custom_css_str);
/**
* Create a new WebView instance with a certain width and height, using
* the WebCore we just created.
*/
awe_webview* webView = awe_webcore_create_webview(WIDTH, MAX_IMAGE_HEIGHT,
false);
// Bind our scroll data callback
awe_webview_set_callback_get_scroll_data(webView, onGetScrollData);
// Create our URL string
awe_string* url_str = awe_string_create_from_ascii(URL, strlen(URL));
// Load the URL into our WebView instance
awe_webview_load_url(webView, url_str, awe_string_empty(),
awe_string_empty(), awe_string_empty());
// Destroy our URL string
awe_string_destroy(url_str);
printf("Page is now loading...\n");
// Wait for our WebView to finish loading
while(awe_webview_is_loading_page(webView))
updateCore();
// Force a couple updates (for Flash loading, etc.)
for(int i = 0; i < FORCE_NUM_UPDATES; i++)
updateCore();
printf("Page has finished loading.\n");
// Get the page dimensions now.
awe_webview_request_scroll_data(webView, awe_string_empty());
while(!gotPageDimensions)
updateCore();
// If our content height is larger than the max image height,
// we will split the render up into multiple images
if(cHeight > MAX_IMAGE_HEIGHT)
{
resizeNow(webView, cWidth, MAX_IMAGE_HEIGHT);
int imgCount = 0;
for(int i = 0; i < cHeight; i += MAX_IMAGE_HEIGHT, imgCount++)
{
if(cHeight - i < MAX_IMAGE_HEIGHT)
resizeNow(webView, cWidth, cHeight - i);
char filename[50];
int len = sprintf(filename, "./result_%d.jpg", imgCount);
scrollToNow(webView, i);
renderTo(webView, filename);
}
}
else // Otherwise, just render it all to a single image
{
resizeNow(webView, cWidth, cHeight);
renderTo(webView, "./result.jpg");
}
// Destroy our WebView instance
awe_webview_destroy(webView);
updateCore();
// Destroy our WebCore instance
awe_webcore_shutdown();
return 0;
}
void onGetScrollData(awe_webview* caller,
int contentWidth,
int contentHeight,
int preferredWidth,
int scrollX,
int scrollY)
{
cWidth = contentWidth;
cHeight = contentHeight;
cScrollY = scrollY;
gotPageDimensions = true;
}
// Sleep for a specified length
void sleepMs(int sleepTime)
{
#if defined(__WIN32__) || defined(_WIN32)
Sleep(sleepTime);
#elif defined(__APPLE__)
usleep(sleepTime * 1000);
#endif
}
// Update the WebCore
void updateCore()
{
// Sleep a little bit to let background threads work
sleepMs(SLEEP_MS);
awe_webcore_update();
}
// Resize immediately
void resizeNow(awe_webview* webView, int width, int height)
{
awe_webview_resize(webView, width, height, true, 1000);
while(awe_webview_is_resizing(webView))
updateCore();
}
// Scroll the page immediately
void scrollToNow(awe_webview* webView, int y)
{
if(cScrollY == y)
return;
char buffer[50];
int len = sprintf(buffer, "window.scrollTo(0, %d);", y);
awe_string* js_str = awe_string_create_from_ascii(buffer, len);
// We use execute_javascript_with_result to force it to run
// synchronously
awe_jsvalue* result = awe_webview_execute_javascript_with_result(webView,
js_str, awe_string_empty(), 1000);
awe_jsvalue_destroy(result);
awe_string_destroy(js_str);
// For extra measure, we'll update the scroll data now to make
// sure we've got the most recent copy of the page
gotPageDimensions = false;
awe_webview_request_scroll_data(webView, awe_string_empty());
while(!gotPageDimensions)
updateCore();
}
// Render the page to a certain JPEG filename
void renderTo(awe_webview* webView, const char* filename)
{
const awe_renderbuffer* renderBuffer = awe_webview_render(webView);
if(renderBuffer != NULL)
{
awe_string* filename_str = awe_string_create_from_ascii(filename,
strlen(filename));
// Save our RenderBuffer directly to a JPEG image
awe_renderbuffer_save_to_jpeg(renderBuffer, filename_str, 90);
awe_string_destroy(filename_str);
printf("Saved a render of the page to %s.\n", filename);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment