Metadata in PDF files can be stored in at least two places:
- the Info Dictionary, a limited set of key/value pairs
- XMP packets, which contain RDF statements expressed as XML
<!doctype html> | |
<meta charset="utf-8"> | |
<title>Render the text of a PDF with PDF.js</title> | |
<style> | |
.page-container { | |
box-shadow: 0 1px 3px #444; | |
position: relative; | |
font-size: 1px; | |
line-height: 1; |
<?php | |
if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') { | |
header('Access-Control-Allow-Origin: *'); | |
header('Access-Control-Allow-Methods: GET, OPTIONS'); | |
header('Access-Control-Allow-Headers: accept, x-requested-with, content-type'); | |
exit(); | |
} | |
$url = $_GET['url']; |
<?php | |
$url = 'http://en.wikipedia.org/wiki/1,1,1-Trichloroethane'; // example | |
$config = HTMLPurifier_Config::createDefault(); | |
$config->set('URI.Base', $url); // set the base URL (overrides a <base element in the HTML head?) | |
$config->set('URI.MakeAbsolute', true); // make all URLs absolute using the base URL set above | |
$config->set('AutoFormat.RemoveEmpty', true); // remove empty elements | |
$config->set('HTML.Doctype', 'XHTML 1.0 Strict'); // valid XML output (?) | |
$config->set('HTML.AllowedElements', array('p', 'div', 'a', 'br', 'table', 'thead', 'tbody', 'tr', 'th', 'td', 'ul', 'ol', 'li', 'b', 'i')); |
function listFilesInFolder() { | |
var folder = DocsList.getFolder("Maudesley Debates"); | |
var contents = folder.getFiles(); | |
var file; | |
var data; | |
var sheet = SpreadsheetApp.getActiveSheet(); | |
sheet.clear(); | |
import { FieldProps } from 'formik' | |
import React from 'react' | |
import Select, { Option, ReactSelectProps } from 'react-select' | |
export const SelectField: React.SFC<ReactSelectProps & FieldProps> = ({ | |
options, | |
field, | |
form, | |
}) => ( | |
<Select |
[50, 100, 3].toSorted(Intl.Collator('en', { numeric: true }).compare) |
const { JSDOM } = require('jsdom') | |
const { compact, expand, frame } = require('jsonld') | |
const url = 'https://www.bbc.co.uk/schedules/p00fzl6p/2020/06/14' | |
// fetch and parse HTML | |
const { window: { document } } = await JSDOM.fromURL(url) | |
// select the script elements containing JSON-LD | |
const elements = document.querySelectorAll('script[type="application/ld+json"]') |
/*global PDFJS:false, console:false, Promise:false */ | |
document.addEventListener('WebComponentsReady', function() { | |
'use strict'; | |
//PDFJS.workerSrc = ''; | |
PDFJS.disableWorker = true; | |
PDFJS.disableRange = true; | |
PDFJS.openExternalLinksInNewWindow = true; |
import RSSParser from 'npm:rss-parser' | |
await Deno.mkdir('audio', { recursive: true }) | |
const feedURL = 'https://anchor.fm/s/1252b450/podcast/rss' | |
const feed = await new RSSParser().parseURL(feedURL) | |
for (const item of feed.items) { | |
const { url } = item.enclosure | |
console.log(url) |