Skip to content

Instantly share code, notes, and snippets.

@pablorecio
Created July 4, 2024 09:38
Show Gist options
  • Save pablorecio/59db2f0a2df6ef1f272422c831fc9406 to your computer and use it in GitHub Desktop.
Save pablorecio/59db2f0a2df6ef1f272422c831fc9406 to your computer and use it in GitHub Desktop.
Takes a log file with dbt's output and indicates what models/tests are taking longer
from decimal import Decimal
import re
REGEX = r'^\d\d:\d\d:\d\d \d+ of \d+ (OK|PASS) (.*) \.*\s+\[(PASS|SUCCESS \d+) in (\d+\.\d+)s\]$'
def parse_file(file_path: str) -> list[tuple[str, str, Decimal]]:
with open(file_path) as f:
lines = f.readlines()
regex = re.compile(REGEX)
results = []
for line in lines:
match = regex.match(line)
if match:
_, text, _, timing = match.groups()
if 'created sql incremental model' in text:
type = 'model'
elif 'created sql view model' in text:
type = 'view'
else:
type = 'test'
results.append(
(
text.replace('created sql incremental model ', '').replace('created sql view model ', ''),
type,
Decimal(timing)
)
)
return sorted(results, key=lambda x: x[2], reverse=True)
def main() -> None:
for model, type, timing in parse_file('log.log'):
print(f'{timing}s - \t{type}:{model}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment