Skip to content

Instantly share code, notes, and snippets.

@anatoly-scherbakov
Created August 26, 2019 17:51
Show Gist options
  • Save anatoly-scherbakov/9b532ebf790d91d142f459b54bb310d8 to your computer and use it in GitHub Desktop.
Save anatoly-scherbakov/9b532ebf790d91d142f459b54bb310d8 to your computer and use it in GitHub Desktop.
Parse UNRAR output with pyparsing
#!/bin/env python3
import json
from pyparsing import (
ParserElement, Literal, Group, restOfLine,
ZeroOrMore, LineEnd, CharsNotIn
)
TEXT = """UNRAR 5.71 freeware Copyright (c) 1993-2019 Alexander Roshal
Archive: test_advarchs_direct_link.rar
Details: RAR 5
Name: PyCharmHotKeys.pdf
Type: File
Size: 350573
Packed size: 301085
Ratio: 85%
mtime: 2017-05-30 06:26:53,000000000
Attributes: ..A....
CRC32: 538D9621
Host OS: Windows
Compression: RAR 5.0(v50) -m3 -md=16M
Name: sublime_text_shortcuts.pdf
Type: File
Size: 67750
Packed size: 63559
Ratio: 93%
mtime: 2017-12-06 01:54:03,000000000
Attributes: ..A....
CRC32: F08BA6D1
Host OS: Windows
Compression: RAR 5.0(v50) -m3 -md=16M
Name: TextInMotion-VideoSample.mp4
Type: File
Size: 10454044
Packed size: 9955216
Ratio: 95%
mtime: 2019-08-06 13:34:27,207165800
Attributes: ..A....
CRC32: C799547F
Host OS: Windows
Compression: RAR 5.0(v50) -m3 -md=16M
Name: bensound-summer.mp3
Type: File
Size: 4349086
Packed size: 4329351
Ratio: 99%
mtime: 2019-08-06 13:32:47,950623500
Attributes: ..A....
CRC32: A6203CE2
Host OS: Windows
Compression: RAR 5.0(v50) -m3 -md=16M
Name: Financial Sample.xlsx
Type: File
Size: 83418
Packed size: 77547
Ratio: 92%
mtime: 2019-08-06 13:23:29,329002900
Attributes: ..A....
CRC32: 84D57D24
Host OS: Windows
Compression: RAR 5.0(v50) -m3 -md=16M
Name: QO
Type: Service
Size: 351
Packed size: 351
Ratio: 100%
Attributes: .B
Host OS: Windows
Compression: RAR 5.0(v50) -m0 -md=128K
Service: EOF"""
def parse(value: str):
ParserElement.defaultWhitespaceChars = " \t"
SEMICOLON = Literal(': ').suppress()
EOL = LineEnd().suppress()
header = Literal('UNRAR') + restOfLine
row = Group(
CharsNotIn('\n:') # Key
+ SEMICOLON
+ restOfLine # Value
+ EOL
)
# Sections are separated from each other by empty lines
section = Group(
EOL[1, 2]
+ ZeroOrMore(row)
)
expression = (
header.suppress() # Software info
+ section.suppress() # Archive info
+ ZeroOrMore(section) # Files info
)
sections = expression.parseString(value).asList()
return sections
def main():
# TODO we have to do `.strip()` here because we grab everything
return [
dict((key.strip(), value) for key, value in section)
for section in parse(value=TEXT)
]
if __name__ == '__main__':
print(json.dumps(main(), indent=2))
'''
[
{
"Name": "PyCharmHotKeys.pdf",
"Type": "File",
"Size": "350573",
"Packed size": "301085",
"Ratio": "85%",
"mtime": "2017-05-30 06:26:53,000000000",
"Attributes": "..A....",
"CRC32": "538D9621",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m3 -md=16M"
},
{
"Name": "sublime_text_shortcuts.pdf",
"Type": "File",
"Size": "67750",
"Packed size": "63559",
"Ratio": "93%",
"mtime": "2017-12-06 01:54:03,000000000",
"Attributes": "..A....",
"CRC32": "F08BA6D1",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m3 -md=16M"
},
{
"Name": "TextInMotion-VideoSample.mp4",
"Type": "File",
"Size": "10454044",
"Packed size": "9955216",
"Ratio": "95%",
"mtime": "2019-08-06 13:34:27,207165800",
"Attributes": "..A....",
"CRC32": "C799547F",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m3 -md=16M"
},
{
"Name": "bensound-summer.mp3",
"Type": "File",
"Size": "4349086",
"Packed size": "4329351",
"Ratio": "99%",
"mtime": "2019-08-06 13:32:47,950623500",
"Attributes": "..A....",
"CRC32": "A6203CE2",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m3 -md=16M"
},
{
"Name": "Financial Sample.xlsx",
"Type": "File",
"Size": "83418",
"Packed size": "77547",
"Ratio": "92%",
"mtime": "2019-08-06 13:23:29,329002900",
"Attributes": "..A....",
"CRC32": "84D57D24",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m3 -md=16M"
},
{
"Name": "QO",
"Type": "Service",
"Size": "351",
"Packed size": "351",
"Ratio": "100%",
"Attributes": ".B",
"Host OS": "Windows",
"Compression": "RAR 5.0(v50) -m0 -md=128K"
},
{
"Service": "EOF"
}
]
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment