Stephen Merity Smerity

## part-r-00000
0	48
0000	6
0l	1
0xdc00	13
1	69
10	11
100	3
1001	1
100154	1
1004	1

## knn.cpp
#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
#include <set>
#include <sstream>
#include <unordered_map>
#include <vector>

## bad_rpc.go
package main

import (
	"encoding/gob"
	"fmt"
	"log"
	"net"
	"net/rpc"
)

## AM207_StephenMerity_HM6.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Smerity
                / AM207_StephenMerity_HM6.ipynb
            
            
              Created
              April 10, 2014 16:21
            
              
                AM207 Project Proposal
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## fastest.results
smerity@pegasus:~/Coding/montelight/python$ time ~/Coding/Reference/pypy-2.2.1-linux64/bin/pypy -m cProfile minilight.py roomfront-n-1000.ml.txt

  MiniLight 1.6 Python - http://www.hxa.name/minilight

iteration: 3^C
interrupted
         1155613811 function calls (1062023566 primitive calls) in 89.591 seconds

   Ordered by: standard name

## just_text.py
# To run: python just_text.py > text
###
from glob import glob
#
import warc

# List any of the WARC files found in the data folder
warc_files = glob('data/*.wet.gz')

# Process each of the WARC files we found

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Smerity
                / keybase.md
            
            
              Created
              July 9, 2014 00:35
            
              
                Keybase Proof
              
          
    Keybase proof

I hereby claim:

I am smerity on github.
I am smerity (https://keybase.io/smerity) on keybase.
I have a public key whose fingerprint is 56A2 5996 3078 B205 1053  883A 6615 0186 B74F 858B

To claim this, I am signing this object:

  
## warc
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00000-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00001-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00002-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00003-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00004-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00005-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00006-ip-10-147-4-33.ec2.internal.warc.gz
common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/C

## intro.py
import re
#
from collections import Counter
from glob import glob
from urlparse import urlparse
#
import warc


# Extract the names and total usage count of all the opening HTML tags in the document

## stream_warc.py
import boto
from boto.s3.key import Key
import zlib


def stream_decompress_multi(stream):
  dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
  while True:
    chunk = stream.read(1024 * 8)
    if not chunk:
	#include <algorithm>
	#include <fstream>
	#include <iostream>
	#include <iterator>
	#include <map>
	#include <set>
	#include <sstream>
	#include <unordered_map>
	#include <vector>
	smerity@pegasus:~/Coding/montelight/python$ time ~/Coding/Reference/pypy-2.2.1-linux64/bin/pypy -m cProfile minilight.py roomfront-n-1000.ml.txt

	MiniLight 1.6 Python - http://www.hxa.name/minilight

	iteration: 3^C
	interrupted
	1155613811 function calls (1062023566 primitive calls) in 89.591 seconds

	Ordered by: standard name
	# To run: python just_text.py > text
	###
	from glob import glob
	#
	import warc

	# List any of the WARC files found in the data folder
	warc_files = glob('data/*.wet.gz')

	# Process each of the WARC files we found
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00000-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00001-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00002-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00003-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00004-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00005-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/CC-MAIN-20140416005201-00006-ip-10-147-4-33.ec2.internal.warc.gz
	common-crawl/crawl-data/CC-MAIN-2014-15/segments/1397609521512.15/warc/C
	import re
	#
	from collections import Counter
	from glob import glob
	from urlparse import urlparse
	#
	import warc


	# Extract the names and total usage count of all the opening HTML tags in the document
	import boto
	from boto.s3.key import Key
	import zlib


	def stream_decompress_multi(stream):
	dec = zlib.decompressobj(16 + zlib.MAX_WBITS)
	while True:
	chunk = stream.read(1024 * 8)
	if not chunk: