Skip to content

Instantly share code, notes, and snippets.

@rgchris
Last active January 4, 2022 19:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rgchris/ea6e1edbfd316032e28c995cdc3cfefc to your computer and use it in GitHub Desktop.
Save rgchris/ea6e1edbfd316032e28c995cdc3cfefc to your computer and use it in GitHub Desktop.
Unzip for Rebol 2
Rebol [
Title: "Unzip for Rebol 2"
Date: 3-Jan-2022
Author: "Christopher Ross-Gill"
]
do %tiny-inflate.r
; obtain from
; https://gist.github.com/rgchris/d3fb5f6a6ea6d27ea3817c0e697ac25d
unzip: make object! [
_: none
entry-marker: #{504B0102} ; "PK^A^B"
local-marker: #{504B0304} ; "PK^C^D"
index-marker: #{504B0506} ; "PK^E^F"
advance: func [
'series [word!]
offset [integer!]
/local source
][
either all [
binary? source: get :series
offset <= length? source
][
set :series skip source offset
yes
][
no
]
]
consume: func [
'series [word!]
type [word! integer! binary!]
/local source part value length
][
assert [
binary? source: get :series
]
switch type?/word type [
integer! [
length: type
type: 'part
]
binary! [
length: length? type
part: type
type: 'match
]
word! [
length: select [
char 1
ishort 2
ilong 4
msdos-date 2
msdos-time 2
] type
]
]
assert [
integer? length
]
switch type [
char [
advance :series 1
source/1
]
ishort [
advance :series 2
(shift/left source/2 8) + source/1
]
ilong [
advance :series 4
; could be signed :-/
(shift/left source/4 24)
+ (shift/left source/3 16)
+ (shift/left source/2 8)
+ source/1
]
match [
if find/match source part [
advance :series length
part
]
]
part [
advance :series length
copy/part source length
]
msdos-date [
advance :series 2
part: source/1 or shift/left source/2 8
value: 30-Nov-1979
value/day: value/day + (31 and part)
value/month: value/month + (15 and shift part 5)
value/year: value/year + shift part 9
value
]
msdos-time [
advance :series 2
part: source/1 or shift/left source/2 8
to time! reduce [
shift part 11
63 and shift part 5
31 and part * 2
]
]
]
]
prototype-index: make object! [
type: 'index
count:
size:
offset:
comment:
entries: _
]
prototype-entry: make object! [
type: 'entry
version:
system:
needs:
flags:
method:
time:
date:
checksum:
compressed:
uncompressed:
file-name-length:
extra-field-length:
file-comment-length:
internal-attributes:
external-attributes:
offset:
file-name:
extra-field:
file-comment:
mark: _
]
init: func [
archive [binary!]
/local mark
][
case [
not mark: find/last archive index-marker [
make error! "Not a ZIP file"
]
; sanity check: room for core footer
;
22 > length? mark [
make error! "ZIP index truncated/corrupted"
]
not all [
archive: make prototype-index []
consume mark index-marker
advance mark 6 ; three entries related to a multi-file ZIP archive
archive/count: consume mark 'ishort
archive/size: consume mark 'ilong
archive/offset: consume mark 'ilong
archive/comment: consume mark 'ishort
][
make error! "ZIP index corrupt/invalid"
]
; sanity check: sizes/offsets match up
;
not all [
equal? archive/offset + archive/size + 23 index? mark
tail? skip mark archive/comment
][
make error! "ZIP index sanity check failure"
]
<else> [
if not tail? mark [
archive/comment: as-string copy mark
]
archive/entries: skip head mark archive/offset
archive
]
]
]
step: func [
archive [object!]
/local mark entry
][
case [
zero? archive/count [
none
]
not binary? mark: archive/entries [
make error! "Invalid ZIP archive object"
]
not all [
entry: make prototype-entry []
consume mark entry-marker
entry/version: consume mark 'char
entry/system: consume mark 'char
entry/needs: consume mark 'ishort
entry/flags: consume mark 'ishort
entry/method: consume mark 'ishort
entry/time: consume mark 'msdos-time
entry/date: consume mark 'msdos-date
entry/checksum: reverse consume mark 4
entry/compressed: consume mark 'ilong
entry/uncompressed: consume mark 'ilong
entry/file-name-length: consume mark 'ishort
entry/extra-field-length: consume mark 'ishort
entry/file-comment-length: consume mark 'ishort
advance mark 2 ; multi-file ZIP feature unsupported
entry/internal-attributes: consume mark 'ishort
entry/external-attributes: consume mark 'ilong
entry/offset: consume mark 'ilong
entry/file-name: consume mark entry/file-name-length
entry/file-name: to file! entry/file-name
entry/extra-field: consume mark entry/extra-field-length
entry/file-comment: consume mark entry/file-comment-length
][
make error! "Invalid ZIP directory entry"
]
<else> [
archive/entries: mark
archive/count: archive/count - 1
entry/mark: skip head archive/entries entry/offset
entry
]
]
]
unpack: func [
entry [object!]
/local mark part warnings
][
case [
not binary? mark: entry/mark [
make error! "Invalid ZIP archive object"
]
not consume mark local-marker [
make error! "Invalid ZIP entry"
]
find "/\" last entry/file-name [
either zero? entry/uncompressed [
none
][
make error! "Empty ZIP folder entry expected"
]
]
<else> [
if not empty? warnings: collect [
case/all [
entry/needs <> consume mark 'ishort [
keep "Entry NEEDS field does not match directory record"
]
entry/flags <> consume mark 'ishort [
keep "Entry FLAGS field does not match directory record"
]
entry/method <> consume mark 'ishort [
keep "Entry METHOD field does not match directory record"
]
entry/time <> consume mark 'msdos-time [
keep "Entry TIME field does not match directory record"
]
entry/date <> consume mark 'msdos-date [
keep "Entry DATE field does not match directory record"
]
entry/checksum <> reverse consume mark 4 [
keep "Entry CHECKSUM field does not match directory record"
]
entry/compressed <> consume mark 'ilong [
keep "Entry COMPRESSED field does not match directory record"
]
entry/uncompressed <> consume mark 'ilong [
keep "Entry UNCOMPRESSED field does not match directory record"
]
entry/file-name-length <> consume mark 'ishort [
keep "Entry FILE-NAME-LENGTH field does not match directory record"
]
entry/extra-field-length <> consume mark 'ishort [
keep "Entry EXTRA-FIELD-LENGTH field does not match directory record"
]
entry/file-name <> to file! consume mark entry/file-name-length [
keep "Entry FILE-NAME field does not match directory record"
]
entry/extra-field <> consume mark entry/extra-field-length [
keep "Entry EXTRA-FIELD field does not match directory record"
]
]
][
; probe warnings
]
if value: switch entry/method [
0 [
copy/part mark entry/uncompressed
]
8 [
either zero? entry/uncompressed [
make binary! 0
][
inflate mark make binary! entry/uncompressed
]
]
][
if not zero? 1 and entry/internal-attributes [
value: as-string value
]
value
]
]
]
]
to-block: func [
archive [binary!]
/local index entry
][
index: init archive
new-line/all/skip collect [
while [
entry: step index
][
keep entry/file-name
keep unzip/unpack entry
]
] true 2
]
]

Versatile handler of ZIP archives.

Basic Usage

Unzip an archive to a block

unzip/to-block read/binary %some-archive.zip

Fine-Grained Usage

Retrieve a single entry

index: unzip/init read/binary %some-archive.zip

while [
    entry: unzip/step index
][
    if entry/file-name = %mimetype [
        break/return to string! unzip/unpack entry
    ]
]

Implementation Notes

Rebol 2 has a non-standard DECOMPRESS function that can only inflate a deflate stream if it knows both the length and the Adler32 checksum of the uncompressed data. While the ZIP specification provides for the uncompressed length, it only supplies a CRC32 checksum thus cannot be used to decompress data from within ZIP files.

This version of Unzip uses a Rebol 2 port of Tiny Inflate (from: tiny-inflate) as an alternative to perform the necessary decompressions. As it is a user-mode algorithm (as opposed to native C), it can be a bit slow decompressing larger files—not ideal but passible if this is a feature you must have.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment