Created
March 12, 2012 12:08
-
-
Save osa1/2021424 to your computer and use it in GitHub Desktop.
bencode decoder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; Bencode decoder. Converts bencoded strings/streams to CL data | |
;;; structures. | |
;;; Usage: | |
;;; (bencode:decode stream-or-string) | |
;;; Bencode dictionaries will be converted to alists and lists will be | |
;;; converted to CL lists. | |
;;; TODO: error handling. | |
(in-package :cl-user) | |
(defpackage bencode | |
(:use :cl) | |
(:export :decode)) | |
(in-package :bencode) | |
(defparameter *parse-dictionaries-as* 'alist) | |
(defun read-until (stream char &optional unread-p) | |
"Read chars from stream and write them to a string until target char. | |
Unread last char if unread-p is t." | |
(with-output-to-string (output-stream) | |
(loop for ch = (read-char stream) | |
while (not (eql ch char)) | |
do (write-char ch output-stream)) | |
(when unread-p | |
(unread-char char stream)) | |
output-stream)) | |
(defun read-string-length (stream) | |
(parse-integer | |
(read-until stream #\:))) | |
(defun read-string (stream length) | |
(with-output-to-string (output-stream) | |
(dotimes (i length) | |
(write-char (read-char stream) output-stream)) | |
output-stream)) | |
(defun read-integer (stream) | |
(parse-integer | |
(read-until stream #\e))) | |
(defun read-list (stream) | |
(loop for ch = (read-char stream nil) | |
while (and ch (not (eql ch #\e))) | |
do (unread-char ch stream) | |
collect (read-value stream))) | |
(defun read-dict (stream) | |
(loop for ch = (read-char stream nil) | |
while (and ch (not (eql ch #\e))) | |
do (unread-char ch stream) | |
collect `(,(read-string stream (read-string-length stream)) | |
,(read-value stream)))) | |
(defun read-value (stream) | |
(let ((ch (read-char stream))) | |
(case ch | |
(#\i | |
(read-integer stream)) | |
((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) | |
;#.(loop for i from 0 to 9 collect (digit-char i)) | |
(unread-char ch stream) | |
(read-string stream (read-string-length stream))) | |
(#\l | |
(read-list stream)) | |
(#\d | |
(read-dict stream))))) | |
(defgeneric decode (stream-or-string) | |
(:documentation "Convert bencoded string or stream to CL data structures. | |
Bencode dictionaries will be converted to alists and lists will be converted to CL lists.")) | |
(defmethod decode ((string string)) | |
(decode (make-string-input-stream string))) | |
(defmethod decode ((stream stream)) | |
(read-value stream)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It's not appropriate to decode torrent files by
read-char
. Because some*.torrent
files support propertyencoding
(for example GBK),path.utf-8
orname.utf-8
,read-char
might cause exceptions or generate wrong result. It's better to useread-byte
.