Skip to content

Instantly share code, notes, and snippets.

@jingningzhang1
Created June 24, 2022 00:29
Show Gist options
  • Save jingningzhang1/e808f5e02a5fd719514b94d018d1e565 to your computer and use it in GitHub Desktop.
Save jingningzhang1/e808f5e02a5fd719514b94d018d1e565 to your computer and use it in GitHub Desktop.
def get_scatter_data(xaxis, comp):
"""
Fetches specified columns and an aggregated column from the silver_users table, returns it as a pandas dataframe
Returns
-------
df : pandas dataframe
basic query of data from Databricks as a pandas dataframe
"""
connection0 = sql.connect(
server_hostname=SERVER_HOSTNAME,
http_path=HTTP_PATH,
access_token=ACCESS_TOKEN,
)
cursor0 = connection0.cursor()
cursor0.execute(
f"""SELECT {xaxis}, {comp}, risk, Count(DISTINCT userid) AS Total
FROM(
SELECT
CASE WHEN gender='F' THEN 'Female' ELSE 'Male' END AS sex,
age, height, weight,
CASE WHEN smoker='N' THEN 'Non-smoker' ELSE 'Smoker' END AS Smoker,
cholestlevs AS cholesterol, bp AS bloodpressure, risk, userid
FROM {DB_NAME}.{USER_TABLE}
)
GROUP BY {xaxis}, {comp}, risk
"""
)
df = cursor0.fetchall_arrow()
df = df.to_pandas()
cursor0.close()
connection0.close()
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment