Skip to content

Instantly share code, notes, and snippets.

@tacastillo
Last active April 28, 2023 17:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tacastillo/c4a7a1b55028fcdda1cc07961bd40ba8 to your computer and use it in GitHub Desktop.
Save tacastillo/c4a7a1b55028fcdda1cc07961bd40ba8 to your computer and use it in GitHub Desktop.
How to get analyze materialization runtimes
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
ENDPOINT = "http://localhost:3000/graphql"
async def run_query(query):
transport = AIOHTTPTransport(url=ENDPOINT)
async with Client(
transport=transport,
fetch_schema_from_transport=True,
) as session:
gql_query = gql(query)
result = await session.execute(gql_query)
return result
get_materialization_runtimes_query = """
query GetMaterializationRuntimes {
assetsOrError {
__typename
... on AssetConnection {
nodes {
assetMaterializations {
assetKey {
path
}
runId
runOrError {
... on Run {
status
startTime
}
}
stepStats {
startTime
endTime
}
}
}
}
}
}
"""
result = await run_query(get_materialization_runtimes_query)
raw_runtimes = pd.json_normalize(result, record_path=['assetsOrError', 'nodes', 'assetMaterializations'], errors='ignore')
raw_runtimes["asset_key"] = raw_runtimes["assetKey.path"].apply(lambda x: '/'.join(x))
raw_runtimes["duration"] = raw_runtimes["stepStats.endTime"] - raw_runtimes["stepStats.startTime"]
raw_runtimes.rename(columns={"runOrError.startTime": "run_start_time"}, inplace=True)
raw_runtimes["run_start_time"] = pd.to_datetime(raw_runtimes["run_start_time"], unit="s")
runtimes = raw_runtimes[["asset_key", "duration", "run_start_time"]]
grouped_df = runtimes.groupby(['run_start_time', 'asset_key']).agg({'duration': 'sum'}).reset_index()
pivoted_df = grouped_df.pivot(index='run_start_time', columns='asset_key', values='duration')
pivoted_df.plot.area(stacked=True)
query GetMaterializationMetadata {
assetsOrError {
__typename
... on AssetConnection {
nodes {
assetMaterializations {
assetKey {
path
}
runId
metadataEntries {
... on IntMetadataEntry {
label
description
intValue
}
... on FloatMetadataEntry {
label
description
floatValue
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment