Skip to content

Instantly share code, notes, and snippets.

@westonruter
Created January 23, 2018 07:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save westonruter/04d479e809409e1f12a5944701f6f24f to your computer and use it in GitHub Desktop.
Save westonruter/04d479e809409e1f12a5944701f6f24f to your computer and use it in GitHub Desktop.
diff --git a/amp.php b/amp.php
index 449a814..731e4c6 100644
--- a/amp.php
+++ b/amp.php
@@ -283,6 +283,7 @@ function amp_render_post( $post ) {
}
$post_id = $post->ID;
+
/*
* If amp_render_post is called directly outside of the standard endpoint, is_amp_endpoint() will return false,
* which is not ideal for any code that expects to run in an AMP context.
@@ -292,6 +293,12 @@ function amp_render_post( $post ) {
if ( ! $was_set ) {
$wp_query->query_vars[ AMP_QUERY_VAR ] = true;
}
+//
+// $is_utf8 = 'utf-8' === strtolower( get_bloginfo( 'charset' ) );
+// if ( ! $is_utf8 ) {
+// header( 'Content-Type: text/html; charset=utf-8' );
+// ob_start();
+// }
/**
* Fires before rendering a post in AMP.
@@ -304,8 +311,22 @@ function amp_render_post( $post ) {
amp_add_post_template_actions();
$template = new AMP_Post_Template( $post );
+ header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) );
+ print_r( $template );
+ exit;
+
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) );
+// print_r( $template );exit;
$template->load();
+
+ // AMP requires UTF-8.
+ if ( ! $is_utf8 ) {
+ $buffer = ob_get_clean();
+ $buffer = mb_convert_encoding( $buffer, 'utf-8', get_bloginfo( 'charset' ) );
+ echo $buffer; // WPCS: xss ok.
+ }
+
if ( ! $was_set ) {
unset( $wp_query->query_vars[ AMP_QUERY_VAR ] );
}
diff --git a/includes/templates/class-amp-content-sanitizer.php b/includes/templates/class-amp-content-sanitizer.php
index ddf5330..1b3508d 100644
--- a/includes/templates/class-amp-content-sanitizer.php
+++ b/includes/templates/class-amp-content-sanitizer.php
@@ -20,14 +20,17 @@ class AMP_Content_Sanitizer {
* @param string $content HTML content string or DOM document.
* @param string[] $sanitizer_classes Sanitizer classes.
* @param array $global_args Global args.
- * @return array Tuple containing sanitized HTML, scripts array, and styles array.
+ * @return array Tuple containing sanitized HTML (in site's character encoding), scripts array, and styles array.
*/
public static function sanitize( $content, array $sanitizer_classes, $global_args = array() ) {
$dom = AMP_DOM_Utils::get_dom_from_content( $content );
$results = self::sanitize_document( $dom, $sanitizer_classes, $global_args );
+
+ $html = AMP_DOM_Utils::get_content_from_dom( $dom );
+
return array(
- AMP_DOM_Utils::get_content_from_dom( $dom ),
+ $html,
$results['scripts'],
$results['styles'],
);
diff --git a/includes/templates/class-amp-post-template.php b/includes/templates/class-amp-post-template.php
index 313fe1f..3effdae 100644
--- a/includes/templates/class-amp-post-template.php
+++ b/includes/templates/class-amp-post-template.php
@@ -363,6 +363,10 @@ class AMP_Post_Template {
)
);
+ # GOOD
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) );
+// print_r( $amp_content );exit;
+
$this->add_data_by_key( 'post_amp_content', $amp_content->get_amp_content() );
$this->merge_data_for_key( 'amp_component_scripts', $amp_content->get_amp_scripts() );
$this->merge_data_for_key( 'post_amp_styles', $amp_content->get_amp_styles() );
diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php
index 9492dd4..db07e81 100644
--- a/includes/utils/class-amp-dom-utils.php
+++ b/includes/utils/class-amp-dom-utils.php
@@ -54,14 +54,9 @@ class AMP_DOM_Utils {
public static function get_dom( $document ) {
$libxml_previous_state = libxml_use_internal_errors( true );
- $dom = new DOMDocument();
+ $dom = new DOMDocument( '1.0', get_bloginfo( 'charset' ) );
+// $dom->substituteEntities = false;
- /*
- * Wrap in dummy tags, since XML needs one parent node.
- * It also makes it easier to loop through nodes.
- * We can later use this to extract our nodes.
- * Add charset so loadHTML does not have problems parsing it.
- */
$result = $dom->loadHTML( $document );
libxml_clear_errors();
@@ -86,6 +81,12 @@ class AMP_DOM_Utils {
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
*/
public static function get_dom_from_content( $content ) {
+
+ // Make sure content is converted to UTF-8 first.
+ if ( function_exists( 'mb_convert_encoding' ) && 'utf-8' !== strtolower( get_bloginfo( 'charset' ) ) ) {
+ $content = mb_convert_encoding( $content, 'utf-8', get_bloginfo( 'charset' ) );
+ }
+
/*
* Wrap in dummy tags, since XML needs one parent node.
* It also makes it easier to loop through nodes.
@@ -94,8 +95,7 @@ class AMP_DOM_Utils {
* See: http://php.net/manual/en/domdocument.loadhtml.php#78243
*/
$document = sprintf(
- '<html><head><meta http-equiv="content-type" content="text/html; charset=%s"></head><body>%s</body></html>',
- get_bloginfo( 'charset' ),
+ '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>%s</body></html>',
$content
);
@@ -112,7 +112,7 @@ class AMP_DOM_Utils {
*
* @param DOMDocument $dom Represents an HTML document from which to extract HTML content.
*
- * @return string Returns the HTML content represented in the DOMDocument
+ * @return string Returns the HTML content represented in the DOMDocument in UTF-8.
*/
public static function get_content_from_dom( $dom ) {
@@ -136,6 +136,25 @@ class AMP_DOM_Utils {
$out .= self::get_content_from_dom_node( $dom, $child_node );
}
+ /*
+ * Since AMP_DOM_Utils::get_content_from_dom() always returns markup as UTF-8 HTML,
+ * we must convert the content back to to the blog's encoding prior to finally
+ * convert everything back to UTF-8 in the end, since AMP mandates UTF-8.
+ */
+ if ( function_exists( 'mb_convert_encoding' ) && 'utf-8' !== strtolower( get_bloginfo( 'charset' ) ) ) {
+// header( 'content-type: text/html; charset=utf-8' );
+// echo $html;exit;
+
+// header( 'content-type: text/html; charset=UTF-8' );
+ header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) );
+ $out = mb_convert_encoding( $out, get_bloginfo( 'charset' ), 'UTF-8' );
+ echo $out;exit;
+
+ // GOOD: Converted to blog charset properly.
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) );
+// echo $html;exit;
+ }
+
return $out;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment