Skip to content

Instantly share code, notes, and snippets.

@saagarjha
Created February 17, 2023 09:36
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saagarjha/07e897fd72b370027ce280480761cf1e to your computer and use it in GitHub Desktop.
Save saagarjha/07e897fd72b370027ce280480761cf1e to your computer and use it in GitHub Desktop.
Apple's metadata extraction code for link previews in Messages, taken from macOS Ventura 13.3 Beta (22E5219e)
//
// LinkPresentation
// Copyright © 2015-2020 Apple Inc. All rights reserved.
//
// FIXME: Twitter equivalents?
(function () {
var MetadataExtractor = {
doc : undefined,
responseURL : undefined,
result : {},
resolveURL : function (URLString) {
// Don't absolutize empty strings.
if (URLString == "" || !URLString)
return URLString;
var URL = new window.URL(URLString, this.responseURL);
return URL.toString();
},
extractAndSortIconsWithSelector : function (selector) {
if (!this.doc.head)
return [];
var extractedIcons = this.doc.head.querySelectorAll(selector);
if (!extractedIcons.length)
return [];
const IconTypeFavicon = 0;
const IconTypeAppleTouchIcon = 1;
const IconTypeAppleTouchIconPrecomposed = 2;
const AppleTouchIconDefaultWidth = 60;
function getIconType(icon) {
var iconTypeString = icon.getAttribute("rel");
if (iconTypeString === "apple-touch-icon")
return IconTypeAppleTouchIcon;
if (iconTypeString === "apple-touch-icon-precomposed")
return IconTypeAppleTouchIconPrecomposed;
return IconTypeFavicon;
}
function compareIconsDescending(icon1, icon2) {
var icon1Type = getIconType(icon1);
var icon2Type = getIconType(icon2);
// Apple Touch icons always come first.
if (icon1Type === IconTypeFavicon && icon2Type !== IconTypeFavicon)
return 1;
if (icon2Type === IconTypeFavicon && icon1Type !== IconTypeFavicon)
return -1;
var icon1Size = icon1.getAttribute("sizes");
var icon2Size = icon2.getAttribute("sizes");
var icon1Width = 0;
var icon2Width = 0;
if (icon1Size)
icon1Width = parseInt(icon1Size);
else if (!icon1Size && (icon1Type === IconTypeAppleTouchIcon || icon1Type === IconTypeAppleTouchIconPrecomposed))
icon1Width = AppleTouchIconDefaultWidth;
if (icon2Size)
icon2Width = parseInt(icon2Size);
else if (!icon2Size && (icon2Type === IconTypeAppleTouchIcon || icon2Type === IconTypeAppleTouchIconPrecomposed))
icon2Width = AppleTouchIconDefaultWidth;
if (icon2Width > icon1Width)
return 1;
if (icon2Width < icon1Width)
return -1;
// A Precomposed icon should come first if both icons have the same size.
if (icon1Type !== IconTypeAppleTouchIconPrecomposed && icon2Type === IconTypeAppleTouchIconPrecomposed)
return 1;
if (icon2Type !== IconTypeAppleTouchIconPrecomposed && icon1Type === IconTypeAppleTouchIconPrecomposed)
return -1;
return 0;
}
return Array.prototype.slice.call(extractedIcons).sort(compareIconsDescending);
},
extractTouchIconURLs : function () {
var icons = this.extractAndSortIconsWithSelector("link[rel=apple-touch-icon], link[rel=apple-touch-icon-precomposed]");
var urls = [];
for (var i = 0; i < icons.length; ++i) {
var url = icons[i].href;
if (url)
urls.push(this.resolveURL(url));
}
return urls;
},
extractFavIconURLs : function () {
var icons = this.extractAndSortIconsWithSelector("link[rel=icon], link[rel='shortcut icon']");
var urls = [];
for (var i = 0; i < icons.length; ++i) {
var url = icons[i].href;
if (url)
urls.push(this.resolveURL(url));
}
return urls;
},
usesActivityPub : function () {
return !!this.doc.head?.querySelector("link[type='application/activity+json']");
},
parseKeyValueString : function (s) {
let pairs = {};
if (!s)
return pairs;
const stringPairs = s.split(",");
stringPairs.forEach((pair) => {
const keyValue = pair.split("=");
if (keyValue.length != 2)
return;
pairs[keyValue[0].trim()] = keyValue[1].trim();
});
return pairs;
}
}
var propertiesDefinitions = {
LPMetadataURL : {
properties : ["og:url"],
transformationFunction : function(val) { return val ? MetadataExtractor.resolveURL(val) : MetadataExtractor.responseURL; },
},
LPMetadataTitle : {
properties : ["apple:title", "og:title", "twitter:title"],
transformationFunction : function(val) { return val ? val : MetadataExtractor.doc.title; },
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataDescription : {
properties : ["og:description", "twitter:description", "Description", "description"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataAppleDescription : {
properties : ["apple:description"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataSiteName : {
properties : ["og:site_name"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataItemType : {
properties : ["og:type"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataRelatedURL : {
properties : ["og:see_also"],
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val); },
},
LPMetadataCreator : {
properties : ["author"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataCreatorFacebookProfile : {
properties : ["article:author"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataCreatorTwitterUsername : {
properties : ["twitter:creator", "twitter:creator:id"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataTwitterCard : {
properties : ["twitter:card"],
stripLeadingAndTrailingWhitespace : true,
decodeHTMLEntities : true,
},
LPMetadataAppleContentID : {
properties : ["apple:content_id"],
},
LPMetadataImages : {
repeatProperties : ["og:image:secure_url", "og:image", "og:image:url", "twitter:image", "image"],
properties : ["og:image:secure_url", "og:image", "og:image:url", "og:image:width", "og:image:height", "og:image:type", "twitter:image:alt", "twitter:image", "image"],
children : {
LPMetadataImageURL : {
properties : ["og:image:secure_url", "og:image", "og:image:url", "twitter:image", "image"],
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val); },
},
LPMetadataImageWidth : {
properties : ["og:image:width"],
transformationFunction : function(val) { return parseInt(val, 10); },
},
LPMetadataImageHeight : {
properties : ["og:image:height"],
transformationFunction : function(val) { return parseInt(val, 10); },
},
LPMetadataImageType : {
properties : ["og:image:type"],
},
LPMetadataImageAccessibilityText : {
properties : ["twitter:image:alt"],
}
},
equivalencyProperty : "LPMetadataImageURL",
},
LPMetadataVideos : {
repeatProperties : ["og:video:secure_url", "og:video", "og:video:url"],
properties : ["og:video:secure_url", "og:video", "og:video:url", "og:video:width", "og:video:height", "og:video:type"],
children : {
LPMetadataVideoURL : {
properties : ["og:video:secure_url", "og:video", "og:video:url"],
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) },
},
LPMetadataVideoWidth : {
properties : ["og:video:width"],
transformationFunction : function(val) { return parseInt(val, 10) },
},
LPMetadataVideoHeight : {
properties : ["og:video:height"],
transformationFunction : function(val) { return parseInt(val, 10) },
},
LPMetadataVideoType : {
properties : ["og:video:type"],
},
},
equivalencyProperty : "LPMetadataVideoURL",
},
LPMetadataStreamingVideos : {
repeatProperties : ["twitter:player:stream"],
properties : ["twitter:player:stream", "twitter:player:stream:content_type"],
children : {
LPMetadataVideoURL : {
properties : ["twitter:player:stream"],
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) },
},
LPMetadataVideoType : {
properties : ["twitter:player:stream:content_type"],
},
},
equivalencyProperty : "LPMetadataVideoURL",
},
LPMetadataAudios : {
repeatProperties : ["og:audio:secure_url", "og:audio", "og:audio:url"],
properties : ["og:audio:secure_url", "og:audio", "og:audio:url", "og:audio:type"],
children : {
LPMetadataAudioURL : {
properties : ["og:audio:secure_url", "og:audio", "og:audio:url"],
transformationFunction : function(val) { return MetadataExtractor.resolveURL(val) },
},
LPMetadataAudioType : {
properties : ["og:audio:type"],
},
},
equivalencyProperty : "LPMetadataAudioURL",
},
LPMetadataAssociatedApplication : {
properties : ["apple-itunes-app"],
transformationFunction : function(val) { return MetadataExtractor.parseKeyValueString(val); },
},
}
function propertyDefinitionNameForProperty(key)
{
for (var propertyDefinitionName in propertiesDefinitions) {
if (!propertiesDefinitions[propertyDefinitionName].properties.includes(key))
continue;
return propertyDefinitionName;
}
return undefined;
}
function namespaceForMetadataKey(key)
{
const firstColonIndex = key.indexOf(":");
if (firstColonIndex == -1)
return "";
return key.substring(0, firstColonIndex).toLowerCase();
}
function resolvePropertyWithDefinition(result, values, propertyDefinition, propertyDefinitionName)
{
var value = undefined;
for (var propertyName of propertyDefinition.properties) {
value = values[propertyName];
if (value)
break;
}
var transformationFunction = propertyDefinition.transformationFunction;
if (transformationFunction)
value = transformationFunction(value);
if (propertyDefinition.stripLeadingAndTrailingWhitespace) {
if (typeof value == "string")
value = value.trim();
}
if (value)
result[propertyDefinitionName] = value;
}
function resolveProperties(result, collectedValues, definitions)
{
for (var propertyDefinitionName in definitions) {
const propertyDefinition = definitions[propertyDefinitionName];
if (!propertyDefinition)
continue;
resolvePropertyWithDefinition(result, collectedValues, propertyDefinition, propertyDefinitionName);
}
}
function resolveRepeatedProperty(collectedValues, repeatedPropertyName)
{
var result = {};
const propertyDefinition = propertiesDefinitions[repeatedPropertyName];
resolveProperties(result, collectedValues, propertyDefinition.children)
result["LPMetadataNamespace"] = namespaceForMetadataKey(Object.keys(collectedValues)[0]);
if (!MetadataExtractor.result.hasOwnProperty(repeatedPropertyName))
MetadataExtractor.result[repeatedPropertyName] = [];
if (propertyDefinition.hasOwnProperty("equivalencyProperty")) {
var equivalencyProperty = propertyDefinition["equivalencyProperty"];
for (var otherResult of MetadataExtractor.result[repeatedPropertyName]) {
if (!otherResult.hasOwnProperty(equivalencyProperty) || !result.hasOwnProperty(equivalencyProperty))
continue;
if (otherResult[equivalencyProperty] != result[equivalencyProperty])
continue;
// Merge missing child properties into the existing item.
Object.keys(propertyDefinition.children).forEach(childPropertyName => {
if (!otherResult.hasOwnProperty(childPropertyName) && result.hasOwnProperty(childPropertyName))
otherResult[childPropertyName] = result[childPropertyName];
});
return;
}
}
MetadataExtractor.result[repeatedPropertyName].push(result);
}
class ObjectWithDefaultValues
{
constructor(type) {
return new Proxy({}, {
get: function(object, property) {
if (!object.hasOwnProperty(property))
object[property] = new type;
return object[property];
}
});
}
}
function removeRepeatedPropertiesFromLessPopulousNamespaces()
{
for (const [rootProperty, children] of Object.entries(MetadataExtractor.result)) {
const childrenByNamespace = new ObjectWithDefaultValues(Array);
for (const child of children)
childrenByNamespace[child["LPMetadataNamespace"]].push(child);
var mostPopulousNamespace = null;
for (const [namespace, childrenOfNamespace] of Object.entries(childrenByNamespace)) {
if (!mostPopulousNamespace || childrenOfNamespace.length > childrenByNamespace[mostPopulousNamespace].length)
mostPopulousNamespace = namespace;
}
MetadataExtractor.result[rootProperty] = childrenByNamespace[mostPopulousNamespace];
}
}
function hasAnyKeysNotInArray(dict, arr)
{
for (var key in dict) {
if (!arr.includes(key))
return true;
}
return false;
}
function decodeHTMLEntities(rawString)
{
var el = document.createElement("textarea");
el.innerHTML = rawString;
return el.value;
}
function parseMetaTags()
{
var allMetaElements = MetadataExtractor.doc.getElementsByTagName("meta");
var rootCollectedValues = {};
var childCollectedValues = new ObjectWithDefaultValues(Object);
for (var i = 0; i < allMetaElements.length; i++) {
var el = allMetaElements[i];
var property = el.getAttribute("property") || el.getAttribute("name") || el.getAttribute("itemprop");
var content = el.content;
if (!property || !content)
continue;
var propertyDefinitionName = propertyDefinitionNameForProperty(property);
if (!propertyDefinitionName)
continue;
var propertyDefinition = propertiesDefinitions[propertyDefinitionName];
if (!propertyDefinition)
continue;
if (propertyDefinition.decodeHTMLEntities)
content = decodeHTMLEntities(content);
if (!propertyDefinition.hasOwnProperty("repeatProperties")) {
// Not a repeating property, so take the first value.
if (!rootCollectedValues.hasOwnProperty(property))
rootCollectedValues[property] = content;
} else {
// A repeating property.
if (propertyDefinition.repeatProperties.includes(property)) {
var collectedValuesForProperty = childCollectedValues[propertyDefinitionName];
if (collectedValuesForProperty && (hasAnyKeysNotInArray(collectedValuesForProperty, propertyDefinition.repeatProperties) || Object.keys(collectedValuesForProperty).includes(property))) {
resolveRepeatedProperty(collectedValuesForProperty, propertyDefinitionName);
childCollectedValues[propertyDefinitionName] = {};
}
}
childCollectedValues[propertyDefinitionName][property] = content;
}
}
for (var propertyDefinitionName in childCollectedValues)
resolveRepeatedProperty(childCollectedValues[propertyDefinitionName], propertyDefinitionName);
removeRepeatedPropertiesFromLessPopulousNamespaces();
resolveProperties(MetadataExtractor.result, rootCollectedValues, propertiesDefinitions);
}
MetadataExtractor.responseURL = window.location.href;
MetadataExtractor.doc = document;
try {
parseMetaTags();
const touchIcons = MetadataExtractor.extractTouchIconURLs();
const favIcons = MetadataExtractor.extractFavIconURLs();
const usesActivityPub = MetadataExtractor.usesActivityPub();
MetadataExtractor.result["LPMetadataIcons"] = touchIcons.concat(favIcons);
MetadataExtractor.result["LPMetadataSelectedText"] = window.getSelection().toString();
MetadataExtractor.result["LPMetadataUsesActivityPub"] = usesActivityPub;
return MetadataExtractor.result;
} catch(e) {
return { error: "Exception: " + e };
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment