Created
January 12, 2012 15:22
-
-
Save svolle/1601090 to your computer and use it in GitHub Desktop.
PyYAML parsing benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
This module loads all files located in PATH directory in memory and parses them as YAML. | |
It outputs the parsing operation duration. | |
--]] | |
require("lfs") | |
require("yaml") | |
local PATH = "yaml_data" | |
local yaml_data = {} | |
local yaml_parsed_data = {} | |
local i = 1 | |
for file in lfs.dir(PATH) do | |
if file ~= "." and file ~= ".." then | |
local f = assert(io.open(string.format("%s/%s", PATH, file), "r")) | |
yaml_data[i] = f:read("*all") | |
assert(f:close()) | |
i = i + 1 | |
end | |
end | |
print("YAML data loaded. Commencing parsing...") | |
time = os.clock() | |
for i,v in ipairs(yaml_data) do | |
table.remove(yaml_data, i) | |
yaml_parsed_data[i] = yaml.load(v) | |
end | |
print(string.format("elapsed time: %.2f\n", os.clock() - time)) | |
io.write("Press enter to continue ") | |
io.flush() | |
io.read() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This module reads all the files from the PATH subdirectory of the current working directory, | |
# and store their content in memory, assuming they contain YAML data. | |
# It then parses all the YAML documents from memory storing the result in memory and outputs | |
# the duration of the whole parsing operation. | |
from __future__ import print_function | |
import yaml | |
import getopt | |
import time, sys, os | |
if sys.hexversion > 0x03000000: | |
get_input = input | |
else: | |
get_input = raw_input | |
PATH = "yaml_data" | |
yaml_data = [] | |
parsed_data = [] | |
use_libyaml = False | |
store_data = False | |
opts, args = getopt.getopt(sys.argv[1:], "", ["use-libyaml"]) | |
for o, a in opts: | |
if o == "--use-libyaml": | |
use_libyaml = True | |
if use_libyaml: | |
try: | |
from yaml import CLoader as Loader, CDumper as Dumper | |
print("Using C-based parser (libyaml)") | |
except ImportError: | |
print("Cannot import C-based parser (libyaml). Falling back to pure Python parser.", | |
file=sys.stderr | |
) | |
from yaml import Loader, Dumper | |
else: | |
print("Using pure Python parser") | |
from yaml import Loader, Dumper | |
for f_name in os.listdir(PATH): | |
with open(os.path.join(PATH, f_name)) as f: | |
yaml_data.append(f.read()) | |
print("YAML data loaded. Commencing parsing...") | |
t = time.clock() | |
while yaml_data: | |
parsed_data.append(yaml.load(yaml_data.pop(), Loader=Loader)) | |
print("Elapsed time: {0:.2f}\n".format(time.clock() - t)) | |
get_input("Press enter to exit\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This module generates NB_FILES YAML documents with random content and random length | |
# and puts them to the PATH directory in the current working directory. | |
import yaml | |
import string | |
import random | |
import os | |
NB_FILES = 1000 | |
PATH = "yaml_data" | |
def string_generator(size=6, chars=string.ascii_letters + string.digits): | |
return ''.join(random.choice(chars) for x in range(size)) | |
def random_length_string(): | |
return string_generator(random.randrange(10,20)) | |
def random_tuple(): | |
return (random_length_string(), random_length_string()) | |
def random_length_list_of_tuple(): | |
return [ random_tuple() for x in range(0, random.randrange(10,20)) ] | |
def random_length_list_of_string(): | |
return [ random_length_string() for x in range(0, random.randrange(10,20)) ] | |
def random_dict(): | |
return { k : v for (k,v) in random_length_list_of_tuple() } | |
def random_sub_dict(): | |
return { random_length_string() : random_dict() } | |
def random_sub_list(): | |
return { random_length_string() : random_length_list_of_string() } | |
functions = [random_length_string, random_dict, random_sub_dict, random_sub_list] | |
if __name__ == "__main__": | |
if not os.path.exists(PATH): | |
os.makedirs(PATH) | |
for i in range(0, FILES): | |
result = [] | |
for x in range(0, random.randrange(100,300)): | |
result.append(functions[random.randrange(0,len(functions))]()) | |
with open("{0}/test_{1}.yaml".format(PATH, i), "w") as stream: | |
yaml.dump(result, stream) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment