Created
April 26, 2017 19:21
-
-
Save wesyoung/483756f102d6f8e16c18b70465f471d8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import magic | |
import sys | |
from pprint import pprint | |
f = sys.argv[1] | |
def _is_ascii(f, mime): | |
if mime == 'ASCII text': | |
return True | |
def _is_rss(f, mime): | |
if not _is_xml(f, mime): | |
return | |
def _is_xml(f, mime): | |
if mime == 'XML document text': | |
return 'xml' | |
def _is_json(f, mime): | |
if not _is_ascii(f, mime): | |
return | |
first = f.readline().rstrip("\n") | |
last = first | |
try: | |
last = f.readlines()[-1].rstrip("\n") | |
except Exception: | |
pass | |
if not (first.startswith("'[{") or first.startswith("'{")): | |
return | |
if not (last.endswith("}]'") or last.endswith("}'")): | |
return | |
return 'json' | |
def _is_delimited(f, mime): | |
if not _is_ascii(f, mime): | |
return | |
m = { | |
"\t": 'tsv', | |
',': 'csv', | |
'|': 'pipe', | |
';': 'semicolon' | |
} | |
first = f.readline().rstrip("\n") | |
while first.startswith('#'): | |
first = f.readline().rstrip("\n") | |
second = f.readline().rstrip("\n") | |
for d in m: | |
c = first.count(d) | |
if c == 0: | |
continue | |
if second.count(d) == c: | |
return m[d] | |
return False | |
TESTS = [ | |
_is_delimited, | |
_is_json, | |
_is_xml, | |
] | |
def feed_type(f, mime): | |
t = None | |
for tt in TESTS: | |
t = tt(f, mime) | |
if t: | |
return t | |
with open(f) as FILE: | |
mime_type = magic.from_file(f) | |
print(mime_type) | |
print(feed_type(FILE, mime_type)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment