Skip to content

Instantly share code, notes, and snippets.

@FurcyPin
Last active February 20, 2022 10:49
Show Gist options
  • Save FurcyPin/fdab5ab1e71c98c5507e9f2525356982 to your computer and use it in GitHub Desktop.
Save FurcyPin/fdab5ab1e71c98c5507e9f2525356982 to your computer and use it in GitHub Desktop.
from bigquery_frame import BigQueryBuilder
from bigquery_frame.auth import get_bq_client
from bigquery_frame.transformations import analyze
from bigquery_frame import functions as f
bigquery = BigQueryBuilder(get_bq_client())
query = """
SELECT
*
FROM UNNEST ([
STRUCT(
1 as id, "Bulbasaur" as name, ["Grass", "Poison"] as types,
STRUCT(TRUE as can_evolve, NULL as evolves_from) as evolution
),
STRUCT(
2 as id, "Ivysaur" as name, ["Grass", "Poison"] as types,
STRUCT(TRUE as can_evolve, 1 as evolves_from) as evolution
),
STRUCT(
3 as id, "Venusaur" as name, ["Grass", "Poison"] as types,
STRUCT(FALSE as can_evolve, 2 as evolves_from) as evolution
),
STRUCT(
4 as id, "Charmander" as name, ["Fire"] as types,
STRUCT(TRUE as can_evolve, NULL as evolves_from) as evolution
),
STRUCT(
5 as id, "Charmeleon" as name, ["Fire"] as types,
STRUCT(TRUE as can_evolve, 4 as evolves_from) as evolution
),
STRUCT(
6 as id, "Charizard" as name, ["Fire", "Flying"] as types,
STRUCT(FALSE as can_evolve, 5 as evolves_from) as evolution
),
STRUCT(
7 as id, "Squirtle" as name, ["Water"] as types,
STRUCT(TRUE as can_evolve, NULL as evolves_from) as evolution
),
STRUCT(
8 as id, "Wartortle" as name, ["Water"] as types,
STRUCT(TRUE as can_evolve, 7 as evolves_from) as evolution
),
STRUCT(
9 as id, "Blastoise" as name, ["Water"] as types,
STRUCT(FALSE as can_evolve, 8 as evolves_from) as evolution
)
])
"""
df = bigquery.sql(query)
df.show()
# +----+------------+---------------------+--------------------------------------------+
# | id | name | types | evolution |
# +----+------------+---------------------+--------------------------------------------+
# | 1 | Bulbasaur | ['Grass', 'Poison'] | {'can_evolve': True, 'evolves_from': None} |
# | 2 | Ivysaur | ['Grass', 'Poison'] | {'can_evolve': True, 'evolves_from': 1} |
# | 3 | Venusaur | ['Grass', 'Poison'] | {'can_evolve': False, 'evolves_from': 2} |
# | 4 | Charmander | ['Fire'] | {'can_evolve': True, 'evolves_from': None} |
# | 5 | Charmeleon | ['Fire'] | {'can_evolve': True, 'evolves_from': 4} |
# | 6 | Charizard | ['Fire', 'Flying'] | {'can_evolve': False, 'evolves_from': 5} |
# | 7 | Squirtle | ['Water'] | {'can_evolve': True, 'evolves_from': None} |
# | 8 | Wartortle | ['Water'] | {'can_evolve': True, 'evolves_from': 7} |
# | 9 | Blastoise | ['Water'] | {'can_evolve': False, 'evolves_from': 8} |
# +----+------------+---------------------+--------------------------------------------+
analyzed_df = analyze(df)
# Analyzing 5 columns ...
analyzed_df\
.withColumn("most_frequent_value", f.expr("approx_top_100[OFFSET(0)]"))\
.drop("approx_top_100").show()
# +------------------------+-------------+-------+----------------+------------+-----------+-----------+------------------------------------+
# | column_name | column_type | count | count_distinct | count_null | min | max | most_frequent_value |
# +------------------------+-------------+-------+----------------+------------+-----------+-----------+------------------------------------+
# | id | INTEGER | 9 | 9 | 0 | 1 | 9 | {'value': '1', 'count': 1} |
# | name | STRING | 9 | 9 | 0 | Blastoise | Wartortle | {'value': 'Bulbasaur', 'count': 1} |
# | types! | STRING | 13 | 5 | 0 | Fire | Water | {'value': 'Grass', 'count': 3} |
# | evolution.can_evolve | BOOLEAN | 9 | 2 | 0 | false | true | {'value': 'true', 'count': 6} |
# | evolution.evolves_from | INTEGER | 9 | 6 | 3 | 1 | 8 | {'value': 'NULL', 'count': 3} |
# +------------------------+-------------+-------+----------------+------------+-----------+-----------+------------------------------------+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment