I'm working with a library called datamodel-code-generator that has a command datamodel-codegen which can be used to access its internal generate function as a command line tool. The library has 2 main supported formats: jsonschema and openapi, for which it has separate parsers. Following 'raw' parsing by these specific parsers, the base parser does a bunch of intricate routines that I find quite inscrutable, and was wondering if I could get some advice on by showing them to you. These routines have method names beginning with two underscores (I call these 'private' methods). Please read the following code and explain it in summary to me.
def parse(
self,
with_import: Optional[bool] = True,
format_: Optional[bool] = True,
settings_path: Optional[Path] = None,
) -> Union[str, Dict[Tuple[str, ...], Result]]:
self.parse_raw()
if with_import:
if self.target_python_version != PythonVersion.PY_36:
self.imports.append(IMPORT_ANNOTATIONS)
if format_:
code_formatter: Optional[CodeFormatter] = CodeFormatter(
self.target_python_version,
settings_path,
self.wrap_string_literal,
skip_string_normalization=not self.use_double_quotes,
)
else:
code_formatter = None
_, sorted_data_models, require_update_action_models = sort_data_models(
self.results
)
results: Dict[Tuple[str, ...], Result] = {}
def module_key(data_model: DataModel) -> Tuple[str, ...]:
return tuple(data_model.module_path)
# process in reverse order to correctly establish module levels
grouped_models = groupby(
sorted(sorted_data_models.values(), key=module_key, reverse=True),
key=module_key,
)
module_models: List[Tuple[Tuple[str, ...], List[DataModel]]] = []
unused_models: List[DataModel] = []
model_to_models: Dict[DataModel, List[DataModel]] = {}
previous_module = () # type: Tuple[str, ...]
for module, models in (
(k, [*v]) for k, v in grouped_models
): # type: Tuple[str, ...], List[DataModel]
for model in models:
model_to_models[model] = models
self.__delete_duplicate_models(models)
self.__replace_duplicate_name_in_module(models)
if len(previous_module) - len(module) > 1:
for parts in range(len(previous_module) - 1, len(module), -1):
module_models.append(
(
previous_module[:parts],
[],
)
)
module_models.append(
(
module,
models,
)
)
previous_module = module
class Processed(NamedTuple):
module: Tuple[str, ...]
models: List[DataModel]
init: bool
imports: Imports
processed_models: List[Processed] = []
for module, models in module_models:
init = False
if module:
parent = (*module[:-1], '__init__.py')
if parent not in results:
results[parent] = Result(body='')
if (*module, '__init__.py') in results:
module = (*module, '__init__.py')
init = True
else:
module = (*module[:-1], f'{module[-1]}.py')
else:
module = ('__init__.py',)
imports = Imports()
scoped_model_resolver = ModelResolver()
self.__change_from_import(models, imports, scoped_model_resolver, init)
self.__extract_inherited_enum(models)
self.__set_reference_default_value_to_field(models)
self.__reuse_model(models, require_update_action_models)
self.__collapse_root_models(models, unused_models)
self.__set_default_enum_member(models)
self.__override_required_field(models)
self.__sort_models(models, imports)
self.__set_one_literal_on_default(models)
processed_models.append(Processed(module, models, init, imports))
for unused_model in unused_models:
if unused_model in model_to_models[unused_model]: # pragma: no cover
model_to_models[unused_model].remove(unused_model)
for module, models, init, imports in processed_models:
result: List[str] = []
if with_import:
result += [str(self.imports), str(imports), '\n']
code = dump_templates(models)
result += [code]
if self.dump_resolve_reference_action is not None:
result += [
'\n',
self.dump_resolve_reference_action(
m.reference.short_name
for m in models
if m.path in require_update_action_models
),
]
body = '\n'.join(result)
if code_formatter:
body = code_formatter.format_code(body)
results[module] = Result(
body=body, source=models[0].file_path if models else None
)
# retain existing behaviour
if [*results] == [('__init__.py',)]:
return results[('__init__.py',)].body
return results