Skip to content

Instantly share code, notes, and snippets.

@cryocaustik
Last active September 23, 2021 01:21
Show Gist options
  • Save cryocaustik/00d6500f0f3d075a0730753596e72f1c to your computer and use it in GitHub Desktop.
Save cryocaustik/00d6500f0f3d075a0730753596e72f1c to your computer and use it in GitHub Desktop.
Python script to find Synth collections, generate them, and output the results to individual named files.

Synth Generate to dedicted files

Use the Python script generate.py to automatically walk a directory of collections, generate them, and export the results into relative individual files.

Execute Pythons script

[21-09-22 20:03:39] ➜  python generate.py
path to collections dir: c:/dev/collections/elig
export directory (default C:\dev\synth-data-generator\exports): 

Results

[21-09-22 20:05:13] ➜ l ./exports/eligibility
total 361M
drwxrwxrwx 1 cryo cryo 4.0K Sep 22 16:35 .
drwxrwxrwx 1 cryo cryo 4.0K Sep 22 16:12 ..
-rwxrwxrwx 1 cryo cryo    0 Sep 22 18:02 clinic.json
-rwxrwxrwx 1 cryo cryo 131M Sep 22 16:34 member.json
-rwxrwxrwx 1 cryo cryo 230M Sep 22 16:34 member_elig_fact.json
-rwxrwxrwx 1 cryo cryo  21K Sep 22 16:35 mso.json
-rwxrwxrwx 1 cryo cryo 398K Sep 22 16:35 provider.json
from subprocess import PIPE, Popen
from pathlib import Path
EXPORT_DIR = Path("./exports")
def find_collections(path: Path) -> list:
"""
Finds all collections in the given path.
"""
if type(path) is not Path:
path = Path(path)
collections = []
for p in path.iterdir():
if p.suffix == ".json":
collections.append(p)
return collections
def generate_collection(collection: Path, seed: str = "1") -> None:
"""
Generates a Synth collection and output results to export directory under the collection name.
"""
export_path = (EXPORT_DIR / collection.name).resolve()
cmd = [
"synth.exe",
"generate",
collection.parent.resolve(),
"--collection",
collection.stem,
"--seed",
str(seed)
]
export_file = open(export_path, "w")
proc = Popen(cmd, stdout=export_file, stderr=PIPE)
errors = proc.communicate()
if len(errors) > 0 and errors[0]:
err_msg = "generate_collection error: {collection}\n\terror: {errors}".format(
errors="\n\t".join(errors),
collection=collection
)
print(err_msg)
def main() -> None:
"""
Walk through all collections in the given path and generate them.
"""
global EXPORT_DIR
collections_path = input("path to collections dir: ")
export_dir = input(f"export directory (default {EXPORT_DIR.resolve()}): ")
if str(export_dir).strip() and Path(export_dir).is_dir():
EXPORT_DIR = Path(export_dir)
collections = find_collections(collections_path)
for collection in collections:
generate_collection(collection)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment