Created
January 23, 2018 07:33
-
-
Save westonruter/04d479e809409e1f12a5944701f6f24f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/amp.php b/amp.php | |
index 449a814..731e4c6 100644 | |
--- a/amp.php | |
+++ b/amp.php | |
@@ -283,6 +283,7 @@ function amp_render_post( $post ) { | |
} | |
$post_id = $post->ID; | |
+ | |
/* | |
* If amp_render_post is called directly outside of the standard endpoint, is_amp_endpoint() will return false, | |
* which is not ideal for any code that expects to run in an AMP context. | |
@@ -292,6 +293,12 @@ function amp_render_post( $post ) { | |
if ( ! $was_set ) { | |
$wp_query->query_vars[ AMP_QUERY_VAR ] = true; | |
} | |
+// | |
+// $is_utf8 = 'utf-8' === strtolower( get_bloginfo( 'charset' ) ); | |
+// if ( ! $is_utf8 ) { | |
+// header( 'Content-Type: text/html; charset=utf-8' ); | |
+// ob_start(); | |
+// } | |
/** | |
* Fires before rendering a post in AMP. | |
@@ -304,8 +311,22 @@ function amp_render_post( $post ) { | |
amp_add_post_template_actions(); | |
$template = new AMP_Post_Template( $post ); | |
+ header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) ); | |
+ print_r( $template ); | |
+ exit; | |
+ | |
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) ); | |
+// print_r( $template );exit; | |
$template->load(); | |
+ | |
+ // AMP requires UTF-8. | |
+ if ( ! $is_utf8 ) { | |
+ $buffer = ob_get_clean(); | |
+ $buffer = mb_convert_encoding( $buffer, 'utf-8', get_bloginfo( 'charset' ) ); | |
+ echo $buffer; // WPCS: xss ok. | |
+ } | |
+ | |
if ( ! $was_set ) { | |
unset( $wp_query->query_vars[ AMP_QUERY_VAR ] ); | |
} | |
diff --git a/includes/templates/class-amp-content-sanitizer.php b/includes/templates/class-amp-content-sanitizer.php | |
index ddf5330..1b3508d 100644 | |
--- a/includes/templates/class-amp-content-sanitizer.php | |
+++ b/includes/templates/class-amp-content-sanitizer.php | |
@@ -20,14 +20,17 @@ class AMP_Content_Sanitizer { | |
* @param string $content HTML content string or DOM document. | |
* @param string[] $sanitizer_classes Sanitizer classes. | |
* @param array $global_args Global args. | |
- * @return array Tuple containing sanitized HTML, scripts array, and styles array. | |
+ * @return array Tuple containing sanitized HTML (in site's character encoding), scripts array, and styles array. | |
*/ | |
public static function sanitize( $content, array $sanitizer_classes, $global_args = array() ) { | |
$dom = AMP_DOM_Utils::get_dom_from_content( $content ); | |
$results = self::sanitize_document( $dom, $sanitizer_classes, $global_args ); | |
+ | |
+ $html = AMP_DOM_Utils::get_content_from_dom( $dom ); | |
+ | |
return array( | |
- AMP_DOM_Utils::get_content_from_dom( $dom ), | |
+ $html, | |
$results['scripts'], | |
$results['styles'], | |
); | |
diff --git a/includes/templates/class-amp-post-template.php b/includes/templates/class-amp-post-template.php | |
index 313fe1f..3effdae 100644 | |
--- a/includes/templates/class-amp-post-template.php | |
+++ b/includes/templates/class-amp-post-template.php | |
@@ -363,6 +363,10 @@ class AMP_Post_Template { | |
) | |
); | |
+ # GOOD | |
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) ); | |
+// print_r( $amp_content );exit; | |
+ | |
$this->add_data_by_key( 'post_amp_content', $amp_content->get_amp_content() ); | |
$this->merge_data_for_key( 'amp_component_scripts', $amp_content->get_amp_scripts() ); | |
$this->merge_data_for_key( 'post_amp_styles', $amp_content->get_amp_styles() ); | |
diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php | |
index 9492dd4..db07e81 100644 | |
--- a/includes/utils/class-amp-dom-utils.php | |
+++ b/includes/utils/class-amp-dom-utils.php | |
@@ -54,14 +54,9 @@ class AMP_DOM_Utils { | |
public static function get_dom( $document ) { | |
$libxml_previous_state = libxml_use_internal_errors( true ); | |
- $dom = new DOMDocument(); | |
+ $dom = new DOMDocument( '1.0', get_bloginfo( 'charset' ) ); | |
+// $dom->substituteEntities = false; | |
- /* | |
- * Wrap in dummy tags, since XML needs one parent node. | |
- * It also makes it easier to loop through nodes. | |
- * We can later use this to extract our nodes. | |
- * Add charset so loadHTML does not have problems parsing it. | |
- */ | |
$result = $dom->loadHTML( $document ); | |
libxml_clear_errors(); | |
@@ -86,6 +81,12 @@ class AMP_DOM_Utils { | |
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed. | |
*/ | |
public static function get_dom_from_content( $content ) { | |
+ | |
+ // Make sure content is converted to UTF-8 first. | |
+ if ( function_exists( 'mb_convert_encoding' ) && 'utf-8' !== strtolower( get_bloginfo( 'charset' ) ) ) { | |
+ $content = mb_convert_encoding( $content, 'utf-8', get_bloginfo( 'charset' ) ); | |
+ } | |
+ | |
/* | |
* Wrap in dummy tags, since XML needs one parent node. | |
* It also makes it easier to loop through nodes. | |
@@ -94,8 +95,7 @@ class AMP_DOM_Utils { | |
* See: http://php.net/manual/en/domdocument.loadhtml.php#78243 | |
*/ | |
$document = sprintf( | |
- '<html><head><meta http-equiv="content-type" content="text/html; charset=%s"></head><body>%s</body></html>', | |
- get_bloginfo( 'charset' ), | |
+ '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>%s</body></html>', | |
$content | |
); | |
@@ -112,7 +112,7 @@ class AMP_DOM_Utils { | |
* | |
* @param DOMDocument $dom Represents an HTML document from which to extract HTML content. | |
* | |
- * @return string Returns the HTML content represented in the DOMDocument | |
+ * @return string Returns the HTML content represented in the DOMDocument in UTF-8. | |
*/ | |
public static function get_content_from_dom( $dom ) { | |
@@ -136,6 +136,25 @@ class AMP_DOM_Utils { | |
$out .= self::get_content_from_dom_node( $dom, $child_node ); | |
} | |
+ /* | |
+ * Since AMP_DOM_Utils::get_content_from_dom() always returns markup as UTF-8 HTML, | |
+ * we must convert the content back to to the blog's encoding prior to finally | |
+ * convert everything back to UTF-8 in the end, since AMP mandates UTF-8. | |
+ */ | |
+ if ( function_exists( 'mb_convert_encoding' ) && 'utf-8' !== strtolower( get_bloginfo( 'charset' ) ) ) { | |
+// header( 'content-type: text/html; charset=utf-8' ); | |
+// echo $html;exit; | |
+ | |
+// header( 'content-type: text/html; charset=UTF-8' ); | |
+ header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) ); | |
+ $out = mb_convert_encoding( $out, get_bloginfo( 'charset' ), 'UTF-8' ); | |
+ echo $out;exit; | |
+ | |
+ // GOOD: Converted to blog charset properly. | |
+// header( 'content-type: text/html; charset=' . get_bloginfo( 'charset' ) ); | |
+// echo $html;exit; | |
+ } | |
+ | |
return $out; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment