John Berlin N0taN3rd

## chromeCrawling.js
const CDP = require('chrome-remote-interface')
const Promise = require('bluebird')
const util = require('util')
const path = require('path')
const fs = require('fs-extra')
const filenameify = require('filenamify-url')
const JsDetector = require('./lib/jsDetector')
const detectLib = require('./lib/detectlibs')

const {apps, categories} = fs.readJsonSync('./lib/apps.json')

## html_rewriter.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import sys

from six.moves.html_parser import HTMLParser
from six.moves.urllib.parse import urljoin, urlsplit, urlunsplit


## typescript-snippets.ts
/**
 * Extracts the type of the items in type T which is an array/set of type U - we want the type U.
 * Aka un-boxes the type U in typescripts generic array T = Array<U>.
 * If the supplied type is not an array then that type is returned as is
 * See the example for more details
 *
 * @example
 *  type ArrayItemType = Unboxed<string[]>
 *  //   ArrayItemType = string
 *

## yt.IA.20170727020348.html

  <!DOCTYPE html><html lang="en" data-cast-api-enabled="true"><head><script type="text/javascript" src="/static/js/analytics.js?v=1500596387.0" charset="utf-8"></script>

<script type="text/javascript">archive_analytics.values.service='wb';archive_analytics.values.server_name='wwwb-app16.us.archive.org';archive_analytics.values.server_ms=1692;</script><script type="text/javascript" src="/static/js/wbhack.js?v=1500596387.0" charset="utf-8"></script>

<script type="text/javascript">
__wbhack.init('https://web.archive.org/web');
</script>
<link rel="stylesheet" type="text/css" href="/static/css/banner-styles.css?v=1500596387.0" />
<link rel="stylesheet" type="text/css" href="/static/css/iconochive.css?v=1500596387.0" />

## index.html
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Still Drinking Gin And Juice</title>
  <style>
    html, body {
      height: 100%;
      margin: 0;
      padding: 0;
      border: 0;

## fetchTest.js
var url = `${location.protocol}/${wbinfo.proxy_magic}/proxy-fetch/https://www.google.com`
var res = await fetch(url)
console.log(await res.text())

## README.md

      
              4 files
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                N0taN3rd
                / README.md
            
            
              Last active
              August 19, 2018 13:37
            
              
                Demo Puppetter And Node.js Chrome Headless Control For Alexander Nwala
              
          
    Requires

Node.js download
Usage


npm install
npm run go

Other Info

Comments in js file and have fun!

  
## gevent-import.txt
# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/__init__.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__init__.py
# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/__init__.cpython-36.pyc'
# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_config.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/_config.py
# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_config.cpython-36.pyc'
# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_compat.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/_compat.py
# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_compat.cpython-36.pyc'
import 'gevent._compat' # <_

## dumpWarc.js
const Parser = require('node-warc')
const filenamifyURL = require('filenamify-url')
const fs = require('fs-extra')
const path = require('path')

const parser = new Parser('<path-to-warcfile>')

class WARCMap {
  constructor () {
    this._requests = new Map()

## read_cdxj.py
from pywb.warcserver.index.cdxobject import CDXObject

def read_cdxj(path):
    with open(path, 'rb') as cdxjin:
        for line in cdxjin:
            cdx = CDXObject(line)
            if 'html' in cdx.get('mime') and "200" == cdx.get('status'):
                print(cdx.get('url'))
	const CDP = require('chrome-remote-interface')
	const Promise = require('bluebird')
	const util = require('util')
	const path = require('path')
	const fs = require('fs-extra')
	const filenameify = require('filenamify-url')
	const JsDetector = require('./lib/jsDetector')
	const detectLib = require('./lib/detectlibs')

	const {apps, categories} = fs.readJsonSync('./lib/apps.json')
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import re
	import sys

	from six.moves.html_parser import HTMLParser
	from six.moves.urllib.parse import urljoin, urlsplit, urlunsplit
	/**
	* Extracts the type of the items in type T which is an array/set of type U - we want the type U.
	* Aka un-boxes the type U in typescripts generic array T = Array<U>.
	* If the supplied type is not an array then that type is returned as is
	* See the example for more details
	*
	* @example
	* type ArrayItemType = Unboxed<string[]>
	* // ArrayItemType = string
	*

	<!DOCTYPE html><html lang="en" data-cast-api-enabled="true"><head><script type="text/javascript" src="/static/js/analytics.js?v=1500596387.0" charset="utf-8"></script>

	<script type="text/javascript">archive_analytics.values.service='wb';archive_analytics.values.server_name='wwwb-app16.us.archive.org';archive_analytics.values.server_ms=1692;</script><script type="text/javascript" src="/static/js/wbhack.js?v=1500596387.0" charset="utf-8"></script>

	<script type="text/javascript">
	__wbhack.init('https://web.archive.org/web');
	</script>
	<link rel="stylesheet" type="text/css" href="/static/css/banner-styles.css?v=1500596387.0" />
	<link rel="stylesheet" type="text/css" href="/static/css/iconochive.css?v=1500596387.0" />
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<title>Still Drinking Gin And Juice</title>
	<style>
	html, body {
	height: 100%;
	margin: 0;
	padding: 0;
	border: 0;
	var url = `${location.protocol}/${wbinfo.proxy_magic}/proxy-fetch/https://www.google.com`
	var res = await fetch(url)
	console.log(await res.text())
	# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/__init__.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__init__.py
	# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/__init__.cpython-36.pyc'
	# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_config.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/_config.py
	# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_config.cpython-36.pyc'
	# /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_compat.cpython-36.pyc matches /home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/_compat.py
	# code object from '/home/john/PycharmProjects/pywb/.venv/lib/python3.6/site-packages/gevent/__pycache__/_compat.cpython-36.pyc'
	import 'gevent._compat' # <_
	const Parser = require('node-warc')
	const filenamifyURL = require('filenamify-url')
	const fs = require('fs-extra')
	const path = require('path')

	const parser = new Parser('<path-to-warcfile>')

	class WARCMap {
	constructor () {
	this._requests = new Map()
	from pywb.warcserver.index.cdxobject import CDXObject

	def read_cdxj(path):
	with open(path, 'rb') as cdxjin:
	for line in cdxjin:
	cdx = CDXObject(line)
	if 'html' in cdx.get('mime') and "200" == cdx.get('status'):
	print(cdx.get('url'))