Skip to content

Instantly share code, notes, and snippets.

"""Match URLs from WARC records according to regular expression.
WARC records are those that match a SQL query on the columnar URL index.
"""
import re
from bs4 import BeautifulSoup
from bs4.dammit import EncodingDetector
from pyspark.sql import SparkSession
_recordWrite: function(id, fnName, totalTime, args) {
// TODO: totalTime isn't that useful since it doesn't count paints/reflows
var writes =
ReactDefaultPerf
._allMeasurements[ReactDefaultPerf._allMeasurements.length - 1]
.writes;
writes[id] = writes[id] || [];
writes[id].push({
type: fnName,
time: totalTime,