Skip to content

Instantly share code, notes, and snippets.

@svolle
Created January 12, 2012 15:22
Show Gist options
  • Save svolle/1601090 to your computer and use it in GitHub Desktop.
Save svolle/1601090 to your computer and use it in GitHub Desktop.
PyYAML parsing benchmark
--[[
This module loads all files located in PATH directory in memory and parses them as YAML.
It outputs the parsing operation duration.
--]]
require("lfs")
require("yaml")
local PATH = "yaml_data"
local yaml_data = {}
local yaml_parsed_data = {}
local i = 1
for file in lfs.dir(PATH) do
if file ~= "." and file ~= ".." then
local f = assert(io.open(string.format("%s/%s", PATH, file), "r"))
yaml_data[i] = f:read("*all")
assert(f:close())
i = i + 1
end
end
print("YAML data loaded. Commencing parsing...")
time = os.clock()
for i,v in ipairs(yaml_data) do
table.remove(yaml_data, i)
yaml_parsed_data[i] = yaml.load(v)
end
print(string.format("elapsed time: %.2f\n", os.clock() - time))
io.write("Press enter to continue ")
io.flush()
io.read()
# This module reads all the files from the PATH subdirectory of the current working directory,
# and store their content in memory, assuming they contain YAML data.
# It then parses all the YAML documents from memory storing the result in memory and outputs
# the duration of the whole parsing operation.
from __future__ import print_function
import yaml
import getopt
import time, sys, os
if sys.hexversion > 0x03000000:
get_input = input
else:
get_input = raw_input
PATH = "yaml_data"
yaml_data = []
parsed_data = []
use_libyaml = False
store_data = False
opts, args = getopt.getopt(sys.argv[1:], "", ["use-libyaml"])
for o, a in opts:
if o == "--use-libyaml":
use_libyaml = True
if use_libyaml:
try:
from yaml import CLoader as Loader, CDumper as Dumper
print("Using C-based parser (libyaml)")
except ImportError:
print("Cannot import C-based parser (libyaml). Falling back to pure Python parser.",
file=sys.stderr
)
from yaml import Loader, Dumper
else:
print("Using pure Python parser")
from yaml import Loader, Dumper
for f_name in os.listdir(PATH):
with open(os.path.join(PATH, f_name)) as f:
yaml_data.append(f.read())
print("YAML data loaded. Commencing parsing...")
t = time.clock()
while yaml_data:
parsed_data.append(yaml.load(yaml_data.pop(), Loader=Loader))
print("Elapsed time: {0:.2f}\n".format(time.clock() - t))
get_input("Press enter to exit\n")
# This module generates NB_FILES YAML documents with random content and random length
# and puts them to the PATH directory in the current working directory.
import yaml
import string
import random
import os
NB_FILES = 1000
PATH = "yaml_data"
def string_generator(size=6, chars=string.ascii_letters + string.digits):
return ''.join(random.choice(chars) for x in range(size))
def random_length_string():
return string_generator(random.randrange(10,20))
def random_tuple():
return (random_length_string(), random_length_string())
def random_length_list_of_tuple():
return [ random_tuple() for x in range(0, random.randrange(10,20)) ]
def random_length_list_of_string():
return [ random_length_string() for x in range(0, random.randrange(10,20)) ]
def random_dict():
return { k : v for (k,v) in random_length_list_of_tuple() }
def random_sub_dict():
return { random_length_string() : random_dict() }
def random_sub_list():
return { random_length_string() : random_length_list_of_string() }
functions = [random_length_string, random_dict, random_sub_dict, random_sub_list]
if __name__ == "__main__":
if not os.path.exists(PATH):
os.makedirs(PATH)
for i in range(0, FILES):
result = []
for x in range(0, random.randrange(100,300)):
result.append(functions[random.randrange(0,len(functions))]())
with open("{0}/test_{1}.yaml".format(PATH, i), "w") as stream:
yaml.dump(result, stream)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment