Skip to content

Instantly share code, notes, and snippets.

@innocenat
Last active December 7, 2019 18:00
Show Gist options
  • Save innocenat/9c4fd987d243881510201b2a34db68fc to your computer and use it in GitHub Desktop.
Save innocenat/9c4fd987d243881510201b2a34db68fc to your computer and use it in GitHub Desktop.
Google Docs to WordPress
<?php
/* Load up wordpress externally */
define('WP_USE_THEMES', false);
define('COOKIE_DOMAIN', false);
define('DISABLE_WP_CRON', true);
require('../wordpress/wp-load.php');
require('../wordpress/wp-admin/includes/image.php');
$user_id = -1;
if (is_user_logged_in()) {
$user = wp_get_current_user();
$user_id = $user->ID;
} else {
header('HTTP/1.1 302 Found');
header('Location: /wordpress/wp-login.php?redirect_to=' . $_SERVER['PHP_SELF']);
die();
}
if (!in_array('administrator', $user->roles) || in_array('editor', $user->roles)) {
die('<h1>Sorry, you are not allow to access this page</h1>');
}
if ($_SERVER['REQUEST_METHOD'] != 'POST'):
?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<title>Google Docs Autoimporter</title>
</head>
<body>
<h2>Google Docs Autoimporter</h2>
<p>You are logged in as : <em><?php echo $user->display_name; ?></em></p>
<!--Button allowing user to display the file picker-->
<button id="pick_file_button" style="display: none;">Pick file</button>
<form method="post" id="import_form" style="display:none">
<p>
Title:<br><input type="text" style="width:100%" id="dname" name="dname">
</p>
<p>
Content:<br><textarea id="dcontent" name="dcontents" style="width:100%;height:300px"></textarea>
</p>
<input type="submit" value="Import">
</form>
<script>
// The API key obtained from the Google API Console.
// Replace with your own API key, or your own key.
var API_KEY = "<API-KEY>";
// The Client ID obtained from the Google API Console. Replace with your own Client ID.
var CLIENT_ID = "<CLIENT-ID>";
// The APP ID obtained from the Google API Console. Replace with your own APP ID and
// keep in sync with API_KEY and CLIENT_ID.
var APP_ID = "<APP-ID>";
// API discovery doc URL for APIs used by this example
var DISCOVERY_DOCS = ["https://www.googleapis.com/discovery/v1/apis/drive/v3/rest"];
// Authorization scopes required by the API; multiple scopes can be
// included, separated by spaces.
var SCOPES = 'https://www.googleapis.com/auth/drive.file';
var GoogleAuth;
var GoogleUser;
var pickFileButton = document.getElementById('pick_file_button');
pickFileButton.onclick = handlePickFileButtonClick;
var form = document.getElementById('import_form');
/**
* On load, called to load the API client library, the auth2 library, and the picker library.
*/
function handleClientLoad() {
gapi.load('client:auth2:picker', initClient);
}
/**
* Initializes the API client library and sets the authorization and current user.
*/
function initClient() {
gapi.client.init({
client_id: CLIENT_ID,
discoveryDocs: DISCOVERY_DOCS,
scope: SCOPES
}).then(function () {
GoogleAuth = gapi.auth2.getAuthInstance();
GoogleUser = GoogleAuth.currentUser.get();
pickFileButton.style.display = 'block';
}, function (error) {
console.log(error);
});
}
/**
* Ensures the user has the correct scopes prior to displaying the picker.
*/
function handlePickFileButtonClick() {
if (!GoogleUser.hasGrantedScopes(SCOPES)) {
GoogleUser.grant({
scope: SCOPES
}).then(displayPicker, function (err) {
// Handle error/declined auth...
});
} else {
displayPicker();
}
}
/**
* Create and render a Picker for selecting any file on My Drive.
* We use .enableFeature(google.picker.Feature.SUPPORT_DRIVES)
* to ensure files in shared drives are included.
*/
function displayPicker() {
// Fetch current token
let authResponse = GoogleUser.getAuthResponse(true);
var view = new google.picker.View(google.picker.ViewId.DOCS);
var picker = new google.picker.PickerBuilder()
.enableFeature(google.picker.Feature.SUPPORT_DRIVES)
.setAppId(APP_ID)
.setOAuthToken(authResponse.access_token)
.addView(view)
.setDeveloperKey(API_KEY)
.setCallback(pickerCallback)
.build();
picker.setVisible(true);
}
/**
* Called when a file is picked. This function gathers metadata from
* the picked documents and from calling get on the fileId.
*/
function pickerCallback(data) {
console.log(data);
var document = data[google.picker.Response.DOCUMENTS][0];
// The following information is passed to the callback, while other has to be
// retrieved by calling files.get() on a file (below)
var fileName = document[google.picker.Document.NAME];
var URL = document[google.picker.Document.URL];
var fileId = document[google.picker.Document.ID];
gapi.client.drive.files.export({
'fileId': fileId,
'mimeType': 'text/html',
'fields': '*'
}).then(function (response) {
window.document.getElementById('dname').value = fileName;
window.document.getElementById('dcontent').innerText = response.body;
form.style.display = "block";
}, function (err) {
console.log(err);
});
}
</script>
<!-- The Google API Loader script. -->
<script async defer src="https://apis.google.com/js/api.js"
onload="this.onload=function(){};handleClientLoad()"
onreadystatechange="if (this.readyState === 'complete') this.onload()">
</script>
</body>
</html>
<?php
die;
endif;
set_time_limit(0);
function parse_style($style)
{
$items = explode(';', $style);
$ret = [];
foreach ($items as $i) {
if (empty(trim($i)))
continue;
list($k, $v) = explode(':', $i);
$ret[trim($k)] = trim($v);
}
return $ret;
}
function unparse_style($s)
{
$ret = '';
foreach ($s as $k => $v) {
$ret .= "$k:$v;";
}
return $ret;
}
function element_empty(DOMNode $html)
{
return empty(preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $html->textContent));
}
// Clean style, attributes, replace style with specific tag, etc.
function parse_and_clean_1(DOMDocument $doc, DOMNode $html)
{
// Remove node;
if ($html->nodeName == 'head') {
$html->parentNode->removeChild($html);
return;
}
// Check attribute
if ($html instanceof DOMElement) {
// Remove all class
$html->removeAttribute('class');
$html->removeAttribute('id');
// Filter out Google Doc comment
// TODO
// Filter out google redirect from link
if ($html->tagName == 'a' && $html->hasAttribute('href')) {
$href = $html->getAttribute('href');
$G_URL = 'https://www.google.com/url?q=';
if (substr($href, 0, strlen($G_URL)) == $G_URL) {
$query_string = parse_url($href, PHP_URL_QUERY);
parse_str($query_string, $g_qs);
$html->setAttribute('href', $g_qs['q']);
}
}
// Clean img tag
if ($html->tagName == 'img') {
$html->removeAttribute('style');
$html->removeAttribute('title');
$html->removeAttribute('alt');
}
// Filter style
$is_bold = false;
$is_italic = false;
if ($html->hasAttribute('style')) {
$style = parse_style($html->getAttribute('style'));
$to_removed = [
'font-family', 'line-height', 'orphans', 'widows', 'height', 'padding', 'margin',
'vertical-align', 'margin-left', 'margin-right', 'margin-top', 'margin-bottom',
'vertical-align', 'padding-left', 'padding-right', 'padding-top', 'padding-bottom',
'text-decoration-skip-ink', '-webkit-text-decoration-skip', 'page-break-after'];
$remove_all = false;
foreach ($style as $k => $v) {
if ($v == 'inherit') $to_removed[] = $k;
if ($k == 'color' && $v == '#000000') $to_removed[] = $k;
if ($k == 'font-weight' && $v == '400') $to_removed[] = $k;
if ($k == 'font-style' && $v == 'normal') $to_removed[] = $k;
if ($k == 'font-size' && $v == '11pt') $to_removed[] = $k;
if ($k == 'font-size' && $v == '11.5pt') $to_removed[] = $k;
if ($k == 'font-size' && $v == '12pt') $to_removed[] = $k;
if ($k == 'text-align' && $v == 'left') $to_removed[] = $k;
if ($k == 'text-decoration' && $v == 'none') $to_removed[] = $k;
if ($k == 'background-color' && $v == '#ffffff') $to_removed[] = $k;
if ($k == 'font-weight' && $v == '700') {
$to_removed[] = $k;
$is_bold = true;
}
if ($k == 'font-style' && $v == 'italic') {
$to_removed[] = $k;
$is_italic = true;
}
if (in_array($html->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$to_removed[] = 'color';
$to_removed[] = 'font-weight';
$to_removed[] = 'font-size';
$is_bold = false;
}
// For span with a inside/inside header
if ($html->tagName == 'span') {
// a inside
if ($html->childNodes->length == 1 && $html->childNodes->item(0)->nodeName == 'a') {
$to_removed[] = 'color';
$to_removed[] = 'text-decoration';
}
// inside header
if (in_array($html->parentNode->tagName, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$to_removed[] = 'color';
$to_removed[] = 'text-decoration';
$to_removed[] = 'font-size';
}
}
// For empty p or span
if (in_array($html->tagName, ['p', 'span']) && element_empty($html)) {
$to_removed = array_merge($to_removed, ['color', 'text-decoration', 'text-align']);
}
}
foreach ($to_removed as $k) {
unset($style[$k]);
}
$s = unparse_style($style);
if (empty($s))
$html->removeAttribute('style');
else
$html->setAttribute('style', $s);
// Bold
if ($is_bold) {
$strong = $doc->createElement('strong');
$html->parentNode->replaceChild($strong, $html);
$strong->appendChild($html);
}
// Italic
if ($is_italic) {
$em = $doc->createElement('em');
$html->parentNode->replaceChild($em, $html);
$em->appendChild($html);
}
}
}
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
}
foreach ($arr as $n) {
parse_and_clean_1($doc, $n);
}
}
}
// Parse and fix newline and new paragraph
function parse_and_clean_2(DOMDocument $doc, DOMNode $html)
{
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $child) {
$arr[] = $child;
}
// Merge paragraph
$base_child = null;
for ($i = 0; $i < count($arr); $i++) {
$child = $arr[$i];
if ($child instanceof DOMElement && $child->tagName == 'p') {
if (element_empty($child)) {
$html->removeChild($child);
$base_child = null;
} else if ($base_child == null && $child->attributes->length == 0) {
$base_child = $child;
} else if ($child->attributes->length == 0) {
// Don't merge if style is different
// Append current child to base child
$html->removeChild($child);
$br = $doc->createElement('br');
$base_child->appendChild($br);
$children = [];
foreach ($child->childNodes as $c) {
$children[] = $c;
}
foreach ($children as $c) {
$base_child->appendChild($c);
}
} else {
$base_child = null;
}
} else {
// Recursive parsing
parse_and_clean_2($doc, $child);
$base_child = null;
}
}
}
}
// Remove unneccessary tags
function parse_and_clean_3(DOMDocument $doc, DOMNode $html)
{
// <br> as first child
if ($html->childNodes && $html->childNodes->length > 0 && $html->childNodes->item(0)->nodeName == 'br') {
$html->removeChild($html->childNodes->item(0));
}
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
}
foreach ($arr as $n) {
parse_and_clean_3($doc, $n);
}
}
// empty span and p
if (in_array($html->nodeName, ['span', 'p', 'strong', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
if (!$html->childNodes || $html->childNodes->length == 0) {
$html->parentNode->removeChild($html);
return;
}
}
// Contentually empty span, strong, em, etc, then we can remove the element, keepin the children
if (in_array($html->nodeName, ['span', 'strong', 'em'])) {
if (empty(trim($html->textContent))) {
$parent = $html->parentNode;
$child = [];
foreach ($html->childNodes as $n) {
$child[] = $n;
}
foreach ($child as $n) {
$html->removeChild($n);
$parent->insertBefore($n, $html);
}
$parent->removeChild($html);
return;
}
}
// Check if is empty span without any attribute
if ($html instanceof DOMElement && $html->tagName == 'span' && $html->attributes->length == 0) {
$parent = $html->parentNode;
$child = [];
foreach ($html->childNodes as $n) {
$child[] = $n;
}
foreach ($child as $n) {
$html->removeChild($n);
$parent->insertBefore($n, $html);
}
$parent->removeChild($html);
}
}
function parse_and_clean_img(DOMDocument $doc, DOMNode $html)
{
// Parse children
if ($html->childNodes && $html->childNodes->length > 0) {
$arr = [];
foreach ($html->childNodes as $n) {
$arr[] = $n;
}
foreach ($arr as $n) {
parse_and_clean_img($doc, $n);
}
}
// Burst image out of everything
// This is just to fix bad formatting
while ($html->nodeName == 'img' && in_array($html->parentNode->nodeName, ['p', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
$parent = $html->parentNode;
$grandparent = $parent->parentNode;
$parent->removeChild($html);
$grandparent->insertBefore($html, $parent);
}
}
// I hate wordpress....
$title = stripslashes($_POST['dname']);
$content = stripslashes($_POST['dcontents']);
$dom = new DOMDocument();
$dom->loadHTML($content);
parse_and_clean_1($dom, $dom);
parse_and_clean_img($dom, $dom);
parse_and_clean_2($dom, $dom);
parse_and_clean_3($dom, $dom);
// Initial WordPress Import
$wp_post_arr = [
'post_author' => $user_id,
'post_content' => '',
'post_title' => $title,
'post_status' => 'draft',
'post_type' => 'post',
];
$post_id = wp_insert_post($wp_post_arr, true);
if ($post_id == 0 || $post_id instanceof WP_Error) {
echo '<h1>Wordpress Error!</h1>';
var_dump($post_id);
die;
}
// Handle image attachment
$dom_imgs = $dom->getElementsByTagName('img');
$imgs = [];
foreach ($dom_imgs as $d) {
$imgs[] = $d;
}
foreach ($imgs as $img) {
$href = $img->getAttribute('src');
// Random file name
$hash = sha1($href . microtime());
$new_name = $hash;
// Download image
$file_contents = file_get_contents($href);
$pattern = "/^content-type\s*:\s*(.*)$/i";
$content_type = '';
// Get content type and file extension
if (($header = preg_grep($pattern, $http_response_header)) && (preg_match($pattern, array_shift(array_values($header)), $match) !== false)) {
$content_type = $match[1];
switch ($content_type) {
case 'image/png':
$new_name .= '.png';
break;
case 'image/gif':
$new_name .= '.gif';
break;
case 'image/bmp':
$new_name .= '.bmp';
break;
case 'image/vnd.microsoft.icon':
$new_name .= '.ico';
break;
case 'image/svg+xml':
$new_name .= '.svg';
break;
case 'image/tiff':
$new_name .= '.tif';
break;
case 'image/webp':
$new_name .= '.webp';
break;
default:
$new_name .= '.jpg';
}
}
// Upload to wordpress
$uploaded = wp_upload_bits($new_name, null, $file_contents);
if (!empty($uploaded['error'])) {
die('<h1>Image Upload Error</h1>' . $uploaded['error']);
}
// Create attachment with previously created post as parent
$attachment = array(
'guid' => $uploaded['url'],
'post_mime_type' => $uploaded['type'],
'post_parent' => $post_id,
'post_title' => preg_replace('/\.[^.]+$/', '', $new_name),
'post_content' => '',
'post_status' => 'inherit'
);
$image_id = wp_insert_attachment($attachment, $uploaded['file'], $post_id, true);
if ($image_id == 0 || $image_id instanceof WP_Error) {
echo '<h1>Wordpress Error!</h1>';
var_dump($post_id);
die;
}
// Generate thumbnail & metadata
$attachment_data = wp_generate_attachment_metadata($image_id, $uploaded['file']);
wp_update_attachment_metadata($image_id, $attachment_data);
// Get the image tag
$img_tag = get_image_tag($image_id, '', '', 'center', 'medium');
// Replace image tag
$template = $dom->createDocumentFragment();
$template->appendXML($img_tag);
$img->parentNode->replaceChild($template, $img);
}
// Update the post with proper image
$output = '';
foreach ($dom->getElementsByTagName('body')->item(0)->childNodes as $item) {
$output .= $dom->saveHTML($item);
}
$wp_post_arr['post_content'] = $output;
$wp_post_arr['ID'] = $post_id;
$post_id = wp_insert_post($wp_post_arr, true);
if ($post_id == 0 || $post_id instanceof WP_Error){
echo '<h1>Wordpress Error!</h1>';
var_dump($post_id);
die;
}
echo '<h1>Post imported</h1>';
echo $output;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment