Skip to content

Instantly share code, notes, and snippets.

@quicklystarfish
Created May 20, 2011 14:21
Show Gist options
  • Save quicklystarfish/982993 to your computer and use it in GitHub Desktop.
Save quicklystarfish/982993 to your computer and use it in GitHub Desktop.
Torrent creation and seeding with Amazon S3.
._makefile_state
.Python
bin
lib
include
*.torrent
#!bin/python
from __future__ import unicode_literals
# based off of http://wiki.theory.org/BitTorrentSpecification#bencoding
class CircularValueError(ValueError):
pass
def dumps(o, check_circular=True, _circular_data=None):
parts = []
if check_circular:
if _circular_data is None:
_circular_data = set()
elif id(o) not in _circular_data:
_circular_data = _circular_data.copy()
_circular_data.add(id(o))
else:
raise CircularValueError("Circular reference detected.")
if isinstance(o, str):
parts.append(str(len(o)).encode("ascii"))
parts.append(b":")
parts.append(o)
elif isinstance(o, int):
parts.append(b"i")
parts.append(str(o).encode("ascii"))
parts.append(b"e")
elif isinstance(o, list):
parts.append(b"l")
for element in o:
parts.append(dumps(element,
check_circular=check_circular,
_circular_data=_circular_data))
parts.append(b"e")
elif isinstance(o, dict):
parts.append(b"d")
for key in sorted(o.keys()):
if not isinstance(key, bytes):
raise TypeError("bencoded dict key must be strings")
parts.append(dumps(key,
check_circular=check_circular,
_circular_data=_circular_data))
parts.append(dumps(o[key],
check_circular=check_circular,
_circular_data=_circular_data))
parts.append(b"e")
else:
raise TypeError("Cannot bencode object of type {}"
.format(type(o)))
return b"".join(parts)
def dump(o, f, *a, **kw):
return f.write(dumps(o, *a, **kw))
def loads(s, _state=None):
_state = _state or {"i": 0}
def get_byte():
return s[_state["i"]]
def pop_byte():
current = get_byte()
_state["i"] += 1
return current
if get_byte() in b"0123456789":
digits = ""
while get_byte() in b"0123456789":
digits += pop_byte()
length = int(digits)
assert pop_byte() == b":"
data = s[_state["i"]:_state["i"] + length]
_state["i"] += length
return data
elif get_byte() == b"i":
pop_byte()
digits = b""
while get_byte() in b"0123456789":
digits += pop_byte()
assert pop_byte() == b"e"
return int(digits)
elif get_byte() == b"l":
pop_byte()
result = []
while get_byte() != b"e":
result.append(loads(s, _state))
assert pop_byte() == b"e"
return result
elif get_byte() == b"d":
pop_byte()
result = {}
while get_byte() != b"e":
key = loads(s, _state)
value = loads(s, _state)
result[key] = value
pop_byte()
return result
else:
raise ValueError("unexpected character in bencoded string", chr(get_byte()))
def load(f, *a, **kw):
return loads(f.read(), *a, **kw)
def test():
def assertRaises(f, exception_type):
try:
f()
except exception_type:
return True
assert exception_type is None, \
("assertRasies: failed expecting {}"
.format(exception_type))
listless_sanity_check_subjects = [
0,
1,
3,
b"foo",
b"bar",
{b"x": {b"x": b"y"}, b"fo": 0},
{b"foo": b"BAR", b"n": {b"bee": 9}},
]
for o in listless_sanity_check_subjects:
assert loads(dumps(o)) == o, "sanity check failed on " + str(o)
assertRaises(lambda: dumps({"foo": "BAR"}), TypeError)
assertRaises(lambda: dumps({b"foo": "BAR"}), TypeError)
assertRaises(lambda: dumps({"foo": b"BAR"}), TypeError)
assertRaises(lambda: dumps({b"foo": 3.5}), TypeError)
assertRaises(lambda: dumps({b"foo": [b"bar", 3.0]}), TypeError)
assertRaises(lambda: dumps({b"foo": b"BAR"}), None)
assert dumps({b"FOO": 2, b"BAR": 1}) == b"d3:BARi1e3:FOOi2ee"
assert dumps({b"!FOO": 2, b"BAR!": 1}) == b"d4:!FOOi2e4:BAR!i1ee"
o = {b"bar": b"baz"}; o[b"foo"] = o
assertRaises(lambda: dumps(o), CircularValueError)
b = (b"d"
b"8:announce"
b"42:http://tracker.amazonaws.com:6969/announce"
b"13:announce-listll"
b"42:http://tracker.amazonaws.com:6969/announceee"
b"4:info"
b"d"
b"6:length"
b"i190149e"
b"4:name"
b"11:anvatar.png"
b"12:piece length"
b"i262144e"
b"6:pieces"
b"20:/\xff\x1a:\x9e!k\xde\x88\xbe\xcd7\xf6\x12\xf1\xee!\x91\x95\xf6"
b"12:x-amz-bucket"
b"17:s3.jeremybanks.ca"
b"9:x-amz-key"
b"11:anvatar.png"
b"e"
b"e")
loads(b)
def main():
test()
if __name__ == "__main__":
import sys
sys.exit(main(*sys.argv[1:]))
access_key = "AKIAIVTTOAMFKQ4X3HYA"
secret_key = "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"
pyv=2.7
S=._makefile_state
.PHONY : initialize
initialize: $S/simples3_installed
$S/simples3_installed: $S/virtualenv_initialized
#
# Installing simples3 module for python${pyv}.
#
bin/pip-${pyv} install "simples3>=1.0"
#
touch $S/simples3_installed
$S/virtualenv_initialized: $S/virtualenv_installed
#
# Initializing virtualenv with python${pyv}.
#
virtualenv --python=python${pyv} --no-site-packages .
#
touch $S/virtualenv_initialized
$S/virtualenv_installed: $S/
#
# Installing pip and virtualenv for python${pyv} if neccessary.
#
sudo easy_install-${pyv} "pip>=1.0.1"
sudo pip-${pyv} install "virtualenv>=1.6.1"
#
touch $S/virtualenv_installed
$S/:
mkdir $S/
clean: .gitignore
rm -rf `cat .gitignore`
#!bin/python
from __future__ import unicode_literals
import base64
import collections
import copy
import hashlib
import itertools
import logging
import math
import mimetypes
import os
import os.path
import random
import urllib2
import simples3
import bencoding
logging.basicConfig()
logger = logging.getLogger("s3seed")
logger.setLevel("DEBUG")
S3SeededTorrent = collections.namedtuple("S3SeededTorrent", "data data_uri magnet_uri url")
def s3_seeded_torrent(f, access_key, secret_key,
torrent_name=None, file_name=None,
mime_type=None, bucket_name=None):
"""Uploads a file to S3 and generates a torrent of it, with
additional preferred trackers. The result is an S3SeededTorrent
object with the torrent file as .data and encoded as.data_uri,
a .manget_uri and an S3 torrent .url."""
if not bucket_name:
bucket_name = "torrent-" + generate_name()
logger.debug("Generated bucket name: %s", bucket_name)
bucket = simples3.S3Bucket(bucket_name,
access_key=access_key,
secret_key=secret_key)
bucket.put_bucket()
logger.debug("Bucket (re)initialized.")
mode = getattr(f, "mode",
"rt" if hasattr(f, "encoding") else "rb")
file_name = os.path.basename(getattr(f, "name", ""))
if not file_name:
if "b" in mode:
file_name = "data.bin"
else:
file_name = "data.txt"
mime_type = (mime_type or mimetypes.guess_type(file_name)[0]
or "application/octet_stream"
if "b" in mode else
"text/plain")
key = file_name
logger.debug("Reading file.")
if not "b" in mode:
data = f.read().encode(getattr(f, "encoding", None) or "utf-8")
else:
data = f.read()
logger.debug("Uploading file to s3 as %s.", key)
s3_file = bucket.put(key, data, mimetype=mime_type, acl="public-read")
logger.debug("Loading Amazon torrent.")
amazon_torrent_url = bucket.make_url(key) + "?torrent"
amazon_torrent_data = urllib2.urlopen(amazon_torrent_url).read()
amazon_torrent = bencoding.loads(amazon_torrent_data)
logger.debug("Generating modified torrent.")
modified_torrent = copy.deepcopy(amazon_torrent)
open_trackers = [[b"udp://tracker.openbittorrent.com:80/announce"],
[b"udp://tracker.publicbt.com:80/announce"],
[b"udp://tracker.ccc.de:80/announce"]]
random.shuffle(open_trackers)
modified_torrent.setdefault(b"announce", open_trackers[0])
modified_torrent.setdefault(b"announce-list", [])
modified_torrent[b"announce-list"][:0] = open_trackers
modified_torrent[b"created by"] = b"https://gist.github.com/982993"
announce_tracker = modified_torrent[b"announce"]
all_trackers_set = set()
all_trackers = list()
for tracker in itertools.chain(*modified_torrent[b"announce-list"]):
if tracker not in all_trackers_set:
all_trackers.append(tracker)
all_trackers_set.add(tracker)
if announce_tracker not in all_trackers_set:
all_trackers.append(announce_tracker)
all_trackers_set.add(announce_tracker)
modified_torrent[b"announce-list"].append(announce_tracker)
announce_tracker = modified_torrent[b"announce"]
modified_torrent_data = bencoding.dumps(modified_torrent)
s3_torrent_key = key + ".torrent"
logger.debug("Uploading torrent file as %s.", s3_torrent_key)
s3_torrent_file = bucket.put(s3_torrent_key, modified_torrent_data,
mimetype="application/x-bittorrent",
acl="public-read")
s3_torrent_file_url = bucket.make_url(key + ".torrent")
logger.debug("Generating magnet URI.")
info_hash = hashlib.sha1(bencoding.dumps(modified_torrent[b"info"])).hexdigest()
magnet_uri = b"magnet:?dn=" + percent_encode(file_name, "")
magnet_uri += b"&xt=urn:btih:" + info_hash
for tracker in all_trackers:
magnet_uri += b"&tr=" + percent_encode(tracker, "")
logger.debug("Generating data URI.")
data_uri_percent = b"data:application/x-bittorrent," + percent_encode(modified_torrent_data)
data_uri_base64 = b"data:application/x-bittorrent;base64," + base64.b64encode(modified_torrent_data)
if len(data_uri_base64) < len(data_uri_percent):
data_uri = data_uri_base64
else:
data_uri = data_uri_percent
return S3SeededTorrent(modified_torrent_data, data_uri, magnet_uri, s3_torrent_file_url)
def generate_name():
return base64.b32encode(os.urandom(30)).lower().strip("=")
def percent_encode(data, safe=""):
"""Simple binary percent encoding."""
unreserved_characters = set(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
b"abcdefghijklmnopqrstuvwxyz"
b"0123456789-_.~")
safe = set(safe) | unreserved_characters
result = b""
for byte in data:
if byte in safe:
result += byte
else:
result += b"%{:02X}".format(ord(byte))
return result
def main(filename=None):
import config
with open(filename + ".torrent", "wb") as out_file:
with open(filename, "rb") as in_file:
torrent = s3_seeded_torrent(in_file,
access_key=config.access_key,
secret_key=config.secret_key)
out_file.write(torrent.data)
print torrent.data_uri
if __name__ == "__main__":
import sys
sys.exit(main(*sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment