Skip to content

Instantly share code, notes, and snippets.

@mando222
Last active July 24, 2024 01:32
Show Gist options
  • Save mando222/faa9ddee1e6bf37b717770971caf4aa9 to your computer and use it in GitHub Desktop.
Save mando222/faa9ddee1e6bf37b717770971caf4aa9 to your computer and use it in GitHub Desktop.
import re
import json
import argparse
def usfm_to_json(usfm_content):
# Dictionary to store the parsed content
parsed_content = {}
# Regular expression to match USFM markers and their content
pattern = r'\\(\w+)\s*(.+?)(?=\\|\Z)'
# Find all matches in the USFM content
matches = re.findall(pattern, usfm_content, re.DOTALL)
# Process each match
for marker, content in matches:
content = content.strip()
if marker in parsed_content:
# If the marker already exists, convert to a list or append to existing list
if isinstance(parsed_content[marker], list):
parsed_content[marker].append(content)
else:
parsed_content[marker] = [parsed_content[marker], content]
else:
parsed_content[marker] = content
return parsed_content
def main():
# Set up argument parser with a more detailed description
parser = argparse.ArgumentParser(
description="Convert USFM (Unified Standard Format Markers) files to JSON format.",
epilog="Example usage: python usfm_to_json.py input.usfm output.json",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("input_file",
help="Path to the input USFM file to be converted.")
parser.add_argument("output_file",
help="Path where the output JSON file will be saved.")
# Add more detailed help information
parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0')
args = parser.parse_args()
try:
# Read the USFM content from the input file
with open(args.input_file, 'r', encoding='utf-8') as file:
usfm_content = file.read()
# Convert USFM to JSON
json_output = usfm_to_json(usfm_content)
# Write the JSON output to the specified file
with open(args.output_file, 'w', encoding='utf-8') as file:
json.dump(json_output, file, indent=2, ensure_ascii=False)
print(f"Conversion successful. JSON output written to {args.output_file}")
except FileNotFoundError:
print(f"Error: The input file '{args.input_file}' was not found.")
except json.JSONDecodeError:
print("Error: Failed to encode the data to JSON.")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment