Last active
July 24, 2024 01:32
-
-
Save mando222/faa9ddee1e6bf37b717770971caf4aa9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
import argparse | |
def usfm_to_json(usfm_content): | |
# Dictionary to store the parsed content | |
parsed_content = {} | |
# Regular expression to match USFM markers and their content | |
pattern = r'\\(\w+)\s*(.+?)(?=\\|\Z)' | |
# Find all matches in the USFM content | |
matches = re.findall(pattern, usfm_content, re.DOTALL) | |
# Process each match | |
for marker, content in matches: | |
content = content.strip() | |
if marker in parsed_content: | |
# If the marker already exists, convert to a list or append to existing list | |
if isinstance(parsed_content[marker], list): | |
parsed_content[marker].append(content) | |
else: | |
parsed_content[marker] = [parsed_content[marker], content] | |
else: | |
parsed_content[marker] = content | |
return parsed_content | |
def main(): | |
# Set up argument parser with a more detailed description | |
parser = argparse.ArgumentParser( | |
description="Convert USFM (Unified Standard Format Markers) files to JSON format.", | |
epilog="Example usage: python usfm_to_json.py input.usfm output.json", | |
formatter_class=argparse.RawDescriptionHelpFormatter | |
) | |
parser.add_argument("input_file", | |
help="Path to the input USFM file to be converted.") | |
parser.add_argument("output_file", | |
help="Path where the output JSON file will be saved.") | |
# Add more detailed help information | |
parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') | |
args = parser.parse_args() | |
try: | |
# Read the USFM content from the input file | |
with open(args.input_file, 'r', encoding='utf-8') as file: | |
usfm_content = file.read() | |
# Convert USFM to JSON | |
json_output = usfm_to_json(usfm_content) | |
# Write the JSON output to the specified file | |
with open(args.output_file, 'w', encoding='utf-8') as file: | |
json.dump(json_output, file, indent=2, ensure_ascii=False) | |
print(f"Conversion successful. JSON output written to {args.output_file}") | |
except FileNotFoundError: | |
print(f"Error: The input file '{args.input_file}' was not found.") | |
except json.JSONDecodeError: | |
print("Error: Failed to encode the data to JSON.") | |
except Exception as e: | |
print(f"An unexpected error occurred: {str(e)}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment