Skip to content

Instantly share code, notes, and snippets.

@tbescherer
Created June 12, 2019 19:42
Show Gist options
  • Save tbescherer/c9fe215c1787bb97a97a2e2c9a19cd12 to your computer and use it in GitHub Desktop.
Save tbescherer/c9fe215c1787bb97a97a2e2c9a19cd12 to your computer and use it in GitHub Desktop.
Rudimentary helper to check that your dbt documentation matches your model
#!/usr/bin/env python3
import json
import os
import pandas as pd
abs_path = os.path.dirname(os.path.abspath(__file__ + '/../'))
catalog_path = os.path.join(abs_path, 'target/catalog.json')
manifest_path = os.path.join(abs_path, 'target/manifest.json')
catalog_df = pd.read_json(catalog_path)
data = json.load(open(manifest_path))
manifest_df = pd.DataFrame(data["nodes"]).T
catalog_dict = {}
for index, row in catalog_df.iterrows():
catalog_dict[row['nodes']['metadata']['name']] = row['nodes']['columns'].keys()
for index, row in manifest_df.iterrows():
if row['resource_type'] == 'model':
name = row['name']
manifest_columns = set(row['columns'].keys())
try:
catalog_columns = set(catalog_dict[name])
if len(manifest_columns.symmetric_difference(catalog_columns)) != 0:
print("\n")
print(name)
manifest_difference = manifest_columns - catalog_columns
catalog_difference = catalog_columns - manifest_columns
if len(manifest_difference) != 0:
print("In docs but not in sql:")
print(manifest_difference)
if len(catalog_difference) != 0:
print("In sql but not in docs:")
print(catalog_difference)
except:
print(f"{name} failed to parse")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment