Skip to content

Instantly share code, notes, and snippets.

@llibra
Created September 22, 2012 18:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save llibra/3767300 to your computer and use it in GitHub Desktop.
Save llibra/3767300 to your computer and use it in GitHub Desktop.
modified urlencode
;;;; urlencodeの実験
(let ((str (map 'string (lambda (_)
(declare (ignore _))
(code-char (random 255)))
(make-string (* 1024 1024)))))
(null (time (puri::encode-escaped-encoding str puri::*reserved-characters* t)))
(null (time (urlencode:urlencode str))))
(in-package :urlencode)
;;; 最適化宣言だけ指定した変更前のバージョン
(locally (declare (optimize speed (debug 0) (safety 0)))
(defun urlencode (string &key (queryp nil))
(with-output-to-string (stream)
(let ((i 0)
(length (length string))
c)
(declare (type fixnum i length))
(tagbody
start
(unless (< i length)
(go end))
(setq c (aref string i))
(incf i)
(cond ((unreserved-char-p c)
(write-char c stream))
((char= c #\Return)
(write-char #\Newline stream)
(when (and (< i length) (char= (aref string i) #\Newline))
(incf i)))
((and queryp (char= c #\Space))
(write-char #\+ stream))
(t (map nil (lambda (octet)
(write-char #\% stream)
(format stream "~2,'0x" octet))
(string-to-octets (string c) :encoding :UTF-8))))
(go start)
end)))))
;; CCL 1.8
;; (PURI::ENCODE-ESCAPED-ENCODING STR PURI::*RESERVED-CHARACTERS* T)
;; took 82,384 microseconds (0.082384 seconds) to run.
;; 13,839 microseconds (0.013839 seconds, 16.80%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 64,990 microseconds (0.064990 seconds) were spent in user mode
;; 13,998 microseconds (0.013998 seconds) were spent in system mode
;; 18,647,856 bytes of memory allocated.
;; 40 minor page faults, 0 major page faults, 0 swaps.
;; (URLENCODE STR)
;; took 7,272,771 microseconds (7.272771 seconds) to run.
;; 1,337,318 microseconds (1.337318 seconds, 18.39%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 7,079,924 microseconds (7.079924 seconds) were spent in user mode
;; 171,974 microseconds (0.171974 seconds) were spent in system mode
;; 1,178,938,384 bytes of memory allocated.
;; 264 minor page faults, 0 major page faults, 0 swaps.
;; SBCL 1.0.58
;; Evaluation took:
;; 0.053 seconds of real time
;; 0.052993 seconds of total run time (0.046993 user, 0.006000 system)
;; 100.00% CPU
;; 124,622,974 processor cycles
;; 18,647,216 bytes consed
;;
;; Evaluation took:
;; 3.412 seconds of real time
;; 3.395483 seconds of total run time (3.268503 user, 0.126980 system)
;; [ Run times consist of 0.404 seconds GC time, and 2.992 seconds non-GC time. ]
;; 99.50% CPU
;; 7,962,815,308 processor cycles
;; 770,256,208 bytes consed
;;; エスケープするごとのアロケーションを一回にまとめたバージョン
(defun urlencode (string &key (queryp nil))
(declare (optimize speed (debug 0) (safety 0))
(type simple-string string))
(with-output-to-string (stream)
(let* ((i 0)
(length (length string))
(octets (string-to-octets string :encoding :UTF-8))
(o 0))
(declare (type fixnum i length o)
(type (simple-array (unsigned-byte 8) (*)) octets))
(tagbody
start
(unless (< i length)
(go end))
(setq o (aref octets i))
(incf i)
(cond ((unreserved-char-p (code-char o))
(write-char (code-char o) stream))
((= o #.(char-int #\Return))
(write-char (code-char o) stream)
(when (and (< i length) (= (aref octets i) #.(char-int #\Newline)))
(incf i)))
((and queryp (= o #.(char-int #\Space)))
(write-char #\+ stream))
(t (multiple-value-bind (h l) (truncate o #x10)
(write-char #\% stream)
(write-char (digit-char h 16) stream)
(write-char (digit-char l 16) stream))))
(go start)
end))))
;; CCL 1.8
;; (PURI::ENCODE-ESCAPED-ENCODING STR PURI::*RESERVED-CHARACTERS* T)
;; took 79,479 microseconds (0.079479 seconds) to run.
;; 13,040 microseconds (0.013040 seconds, 16.41%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 72,989 microseconds (0.072989 seconds) were spent in user mode
;; 0 microseconds (0.000000 seconds) were spent in system mode
;; 18,647,856 bytes of memory allocated.
;; (URLENCODE STR)
;; took 588,420 microseconds (0.588420 seconds) to run.
;; 172,066 microseconds (0.172066 seconds, 29.24%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 575,912 microseconds (0.575912 seconds) were spent in user mode
;; 6,999 microseconds (0.006999 seconds) were spent in system mode
;; 54,629,328 bytes of memory allocated.
;; SBCL 1.0.58
;; Evaluation took:
;; 0.057 seconds of real time
;; 0.054992 seconds of total run time (0.054992 user, 0.000000 system)
;; 96.49% CPU
;; 131,566,498 processor cycles
;; 18,645,040 bytes consed
;;
;; Evaluation took:
;; 0.468 seconds of real time
;; 0.466929 seconds of total run time (0.451931 user, 0.014998 system)
;; 99.79% CPU
;; 1,092,137,270 processor cycles
;; 29,411,200 bytes consed
;;; 文字列ストリームの代わりに文字列をバッファに使ったバージョン
(defun urlencode (string &key (queryp nil))
(declare (optimize speed (debug 0) (safety 0))
(type simple-string string))
(let* ((i 0)
(j 0)
(length (length string))
(octets (string-to-octets string :encoding :UTF-8))
(buffer (make-array (* length 3) :element-type 'standard-char :initial-element #\0))
(o 0))
(declare (type fixnum i j length o)
(type (simple-array (unsigned-byte 8) (*)) octets))
(tagbody
start
(unless (< i length)
(go end))
(setq o (aref octets i))
(incf i)
(cond ((unreserved-char-p (code-char o))
(setf (aref buffer j) (code-char o))
(incf j))
((= o #.(char-int #\Return))
(setf (aref buffer j) (code-char o))
(incf j)
(when (and (< i length) (= (aref octets i) #.(char-int #\Newline)))
(incf i)))
((and queryp (= o #.(char-int #\Space)))
(setf (aref buffer j) #\+)
(incf j))
(t (multiple-value-bind (h l) (truncate o #x10)
(setf (aref buffer j) #\%)
(setf (aref buffer (incf j)) (digit-char h 16))
(setf (aref buffer (incf j)) (digit-char l 16))
(incf j))))
(go start)
end)
#+sbcl
(sb-kernel:%shrink-vector buffer j)
#-sbcl
(subseq buffer 0)))
;; CCL 1.8
;; (PURI::ENCODE-ESCAPED-ENCODING STR PURI::*RESERVED-CHARACTERS* T)
;; took 131,607 microseconds (0.131607 seconds) to run.
;; 70,512 microseconds (0.070512 seconds, 53.58%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 124,981 microseconds (0.124981 seconds) were spent in user mode
;; 1,999 microseconds (0.001999 seconds) were spent in system mode
;; 18,647,856 bytes of memory allocated.
;; 10 minor page faults, 0 major page faults, 0 swaps.
;; (URLENCODE STR)
;; took 385,247 microseconds (0.385247 seconds) to run.
;; 71,201 microseconds (0.071201 seconds, 18.48%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 376,943 microseconds (0.376943 seconds) were spent in user mode
;; 4,000 microseconds (0.004000 seconds) were spent in system mode
;; 26,737,056 bytes of memory allocated.
;; SBCL 1.0.58
;; Evaluation took:
;; 0.071 seconds of real time
;; 0.070989 seconds of total run time (0.054992 user, 0.015997 system)
;; 100.00% CPU
;; 164,967,264 processor cycles
;; 18,652,864 bytes consed
;;
;; Evaluation took:
;; 0.406 seconds of real time
;; 0.404938 seconds of total run time (0.404938 user, 0.000000 system)
;; 99.75% CPU
;; 948,373,048 processor cycles
;; 7,494,320 bytes consed
;;; Puriなども参考にしつつフルスクラッチで書いたバージョン
;;; 内部エンコーディングがASCII互換ということを仮定しないようにしているけど、
;;; 大半の処理系はUTF-16やUTF-32などのASCII互換のエンコーディングを使っているので、
;;; 気にし過ぎな気もする
(declaim (inline unreserved-p))
(defun unreserved-p (x)
(or (<= #x41 x #x5a) ; A-Z
(<= #x61 x #x7a) ; a-z
(<= #x30 x #x39) ; 0-9
(case x
;; - . _ ~
((#x2d #x2e #x5f #x7e) t)
(t nil))))
(defvar *ascii->char-table*
#(nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
#\ #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+ #\, #\- #\. #\/
#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\: #\; #\< #\= #\> #\?
#\@ #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O
#\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\[ #\\ #\] #\^ #\_
#\` #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o
#\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z #\{ #\| #\} #\~ nil))
(declaim (inline ascii->char))
(defun ascii->char (x)
(aref (the (simple-array (or character null) (*)) *ascii->char-table*) x)))
(defun urlencode (string &key (queryp nil) (encoding :utf-8))
(declare (optimize speed (debug 0) (safety 0)))
(do* ((octets (string-to-octets string :encoding encoding))
(end (length octets))
(length (* end 3))
(buffer (make-string length :initial-element #\0))
(i 0 (1+ i))
(j 0))
((= i end)
#+sbcl
(sb-kernel:%shrink-vector buffer j)
#-sbcl
(subseq buffer 0))
(declare (type fixnum i j end)
(type (simple-array (unsigned-byte 8) (*)) octets))
(let ((octet (aref octets i)))
(declare (type (unsigned-byte 8) octet))
(cond ((unreserved-p octet)
(setf (aref buffer j) (ascii->char octet))
(incf j))
((= octet #x0d) ; CR
(setf (aref buffer j) #\newline)
(incf j)
(when (and (< i end) (= (aref octets (1+ i)) #x0a))
(incf i)))
((and queryp (= octet #x20)) ; SP
(setf (aref buffer j) #\+)
(incf j))
(t
(multiple-value-bind (h l) (truncate octet #x10)
(setf (aref buffer j) #\%)
(setf (aref buffer (incf j)) (digit-char h 16))
(setf (aref buffer (incf j)) (digit-char l 16))
(incf j)))))))
;; SBCL 1.0.58
;; Evaluation took:
;; 0.069 seconds of real time
;; 0.068990 seconds of total run time (0.049993 user, 0.018997 system)
;; 100.00% CPU
;; 161,759,836 processor cycles
;; 18,645,040 bytes consed
;;
;; Evaluation took:
;; 0.107 seconds of real time
;; 0.106984 seconds of total run time (0.106984 user, 0.000000 system)
;; 100.00% CPU
;; 250,093,522 processor cycles
;; 20,427,856 bytes consed
;; CCL 1.8
;; (PURI::ENCODE-ESCAPED-ENCODING STR PURI::*RESERVED-CHARACTERS* T)
;; took 133,942 microseconds (0.133942 seconds) to run.
;; 72,516 microseconds (0.072516 seconds, 54.14%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 132,979 microseconds (0.132979 seconds) were spent in user mode
;; 1,000 microseconds (0.001000 seconds) were spent in system mode
;; 18,647,856 bytes of memory allocated.
;; 10 minor page faults, 0 major page faults, 0 swaps.
;; (URLENCODE STR)
;; took 303,147 microseconds (0.303147 seconds) to run.
;; 81,035 microseconds (0.081035 seconds, 26.73%) of which was spent in GC.
;; During that period, and with 2 available CPU cores,
;; 293,956 microseconds (0.293956 seconds) were spent in user mode
;; 2,999 microseconds (0.002999 seconds) were spent in system mode
;; 39,279,680 bytes of memory allocated.
;; 98 minor page faults, 0 major page faults, 0 swaps.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment