Skip to content

Instantly share code, notes, and snippets.

View Smerity's full-sized avatar

Stephen Merity Smerity

View GitHub Profile
@Smerity
Smerity / fetch_page.py
Created August 7, 2015 21:30
An example of fetching a page from Common Crawl using the Common Crawl Index
import gzip
import json
import requests
try:
from cStringIO import StringIO
except:
from StringIO import StringIO
# Let's fetch the Common Crawl FAQ using the CC index
resp = requests.get('http://index.commoncrawl.org/CC-MAIN-2015-27-index?url=http%3A%2F%2Fcommoncrawl.org%2Ffaqs%2F&output=json')
@Smerity
Smerity / Factors.java
Last active November 2, 2023 20:55
Go vs GCCGO vs Java vs Python vs PyPy for naive factorisation
import java.math.BigInteger;
class Factors {
public static void main (String [] args)
{
// 157 bit n = pq with p ~= 78 bits
BigInteger n = new BigInteger("273966616513101251352941655302036077733021013991");
// Set i to be p - 10e6
BigInteger i = new BigInteger("496968652506233112158689");
@Smerity
Smerity / cupy-pytorch-ptx.py
Created May 21, 2017 23:21 — forked from szagoruyko/cupy-pytorch-ptx.py
CuPy example for PyTorch updated to support Python 3
import torch
from cupy.cuda import function
from pynvrtc.compiler import Program
from collections import namedtuple
a = torch.randn(1,4,4).cuda()
b = torch.zeros(a.size()).cuda()
kernel = '''
extern "C"
{
"Envelope" : {
"WARC-Header-Length" : "578",
"Block-Digest" : "sha1:YHKQUSBOS4CLYFEKQDVGJ457OAPD6IJO",
"Format" : "WARC",
"Actual-Content-Length" : "43428",
"WARC-Header-Metadata" : {
"WARC-Record-ID" : "<urn:uuid:ffbfb0c0-6456-42b0-af03-3867be6fc09f>",
"WARC-Warcinfo-ID" : "<urn:uuid:3169ca8e-39a6-42e9-a4e3-9f001f067bdf>",
"Content-Length" : "43428",
@Smerity
Smerity / just_text.py
Created July 8, 2014 01:38
Extract just the text from Common Crawl WARC WET files
# To run: python just_text.py > text
###
from glob import glob
#
import warc
# List any of the WARC files found in the data folder
warc_files = glob('data/*.wet.gz')
# Process each of the WARC files we found
@Smerity
Smerity / gist:ab4c2e8e87cb715e1f0b
Created August 14, 2014 18:02
Open Graph tags
<meta name="description" content="How do you design a property so that it compliments its surrounding so perfectly that it&#039;s virtually invisible to the naked eye? 2014&#039;s Australian House of the Year did just that."/>
<link rel="canonical" href="http://www.realestate.com.au/blog/invisible-house-takes-australian-house-year-australian-house-year/" />
<link rel="publisher" href="https://plus.google.com/+realestatecomau"/>
<meta property="og:locale" content="en_US" />
<meta property="og:type" content="article" />
<meta property="og:title" content="Australian house of the year is ... invisible?" />
<meta property="og:description" content="How do you design a property so that it compliments its surrounding so perfectly that it&#039;s virtually invisible to the naked eye? 2014&#039;s Australian House of the Year did just that." />
<meta property="og:url" content="http://www.realestate.com.au/blog/invisible-house-takes-australian-house-year-australian-house-year/" />
<meta property="og:site_name" content="Th
@Smerity
Smerity / version.bad
Created May 17, 2020 00:31
Difference between high performing and low performing autovectorization
 fn main() {
 push  rbp
 mov  rbp, rsp
 push  r15
 push  r14
 push  r12
 push  rbx
 sub  rsp, 3344
 let mut rng = rand::thread_rng();
 call  rand::rngs::thread::thread_rng
@Smerity
Smerity / post_process.py
Created November 19, 2017 20:55
WikiText: Python 2 post processing used on Moses tokenized input
# encoding=utf8
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import re
number_match_re = re.compile(r'^([0-9]+[,.]?)+$')
number_split_re = re.compile(r'([,.])')
@Smerity
Smerity / send_message
Created January 10, 2019 23:18
Send a message using the Slack postMessage API
import requests
AUTH_TOKEN = 'xoxb-...'
channel = '#locked-out'
USER_ID = 'UDAS0J04S' # A user ID obtained by `list_members.py`
text = f'<@{USER_ID}>, Smerity locked himself out'
params = {
'token': AUTH_TOKEN,
@Smerity
Smerity / list_members.py
Created January 10, 2019 10:27
List members of a bot's Slack
import pprint
import requests
AUTH_TOKEN = 'xoxb-...'
params = {'token': AUTH_TOKEN}
r = requests.post('https://slack.com/api/users.list', params=params)
for member in r.json()['members']: