Skip to content

Instantly share code, notes, and snippets.

@jpigla
Created May 19, 2022 12:55
Show Gist options
  • Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.
Save jpigla/eb08b51ed0b2e8dcf2ea9457f1dee876 to your computer and use it in GitHub Desktop.
Get CrUX Data from Google with Python
# --- Open the file containing the list of URLs ------------------------------------------------------------------------
urls_file = open('urls.txt', 'r')
url_list_file = urls_file.readlines()
# url_list_file = pd.read_csv('urls.csv', header=None).drop_duplicates()[0].to_list()
# --- Set parameters and variables --------------------------------------------------------------------------------------
# Get date of today
date_today = date.today().strftime("%d.%m.%Y")
# Create empty dataframe for the results
df_crux = pd.DataFrame(None)
# Set request parameters
lst_formFactor = ['PHONE', 'DESKTOP'] # 'PHONE' or 'TABLET' or 'DESKTOP'
level = 'url' # 'url' or 'origin'
counter = 0
# Create dict for the results (temporarily used)
crux_data_dict = {}
crux_data_dict['date'] = date.today()
crux_data_dict['level'] = level
# --- Loop over the URLs ------------------------------------------------------------------------------------------------
for url in url_list_file:
# Strip URL
url = url.strip()
# Get domain name
url_netloc = urlparse(url).netloc
for formFactor in lst_formFactor:
# Set request parameters
api_url = f'https://chromeuxreport.googleapis.com/v1/records:queryRecord?key={API_KEY}'
data_dic = { 'formFactor': formFactor, level: url }
header_dic = { 'Content-Type': 'application/json' }
result = requests.post(api_url, json = data_dic, headers = header_dic)
result = result.json()
# print(result)
try:
# Get url
try: crux_data_dict['url'] = result['record']['key']['url']
except KeyError: crux_data_dict['url'] = url
# Set domain name
crux_data_dict['url_netloc'] = url_netloc
# Get form factor
try: crux_data_dict['device'] = result['record']['key']['formFactor']
except KeyError: crux_data_dict['device'] = formFactor
# Get status of the request
try: crux_data_dict['status'] = result['error']['status']
except KeyError: crux_data_dict['status'] = 'Success'
# Get CLS data
try: crux_data_dict['cumulative_layout_shift'] = result['record']['metrics']['cumulative_layout_shift']['percentiles']['p75']
except KeyError: crux_data_dict['cumulative_layout_shift'] = np.nan
try: crux_data_dict['cumulative_layout_shift_good'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][0]['density']
except KeyError: crux_data_dict['cumulative_layout_shift_good'] = np.nan
try: crux_data_dict['cumulative_layout_shift_ni'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][1]['density']
except KeyError: crux_data_dict['cumulative_layout_shift_ni'] = np.nan
try: crux_data_dict['cumulative_layout_shift_bad'] = result['record']['metrics']['cumulative_layout_shift']['histogram'][2]['density']
except KeyError: crux_data_dict['cumulative_layout_shift_bad'] = np.nan
# Get FCP data
try: crux_data_dict['first_contentful_paint'] = result['record']['metrics']['first_contentful_paint']['percentiles']['p75']
except KeyError: crux_data_dict['first_contentful_paint'] = np.nan
try: crux_data_dict['first_contentful_paint_good'] = result['record']['metrics']['first_contentful_paint']['histogram'][0]['density']
except KeyError: crux_data_dict['first_contentful_paint_good'] = np.nan
try: crux_data_dict['first_contentful_paint_ni'] = result['record']['metrics']['first_contentful_paint']['histogram'][1]['density']
except KeyError: crux_data_dict['first_contentful_paint_ni'] = np.nan
try: crux_data_dict['first_contentful_paint_bad'] = result['record']['metrics']['first_contentful_paint']['histogram'][2]['density']
except KeyError: crux_data_dict['first_contentful_paint_bad'] = np.nan
# Get FID data
try: crux_data_dict['first_input_delay'] = result['record']['metrics']['first_input_delay']['percentiles']['p75']
except KeyError: crux_data_dict['first_input_delay'] = np.nan
try: crux_data_dict['first_input_delay_good'] = result['record']['metrics']['first_input_delay']['histogram'][0]['density']
except KeyError: crux_data_dict['first_input_delay_good'] = np.nan
try: crux_data_dict['first_input_delay_ni'] = result['record']['metrics']['first_input_delay']['histogram'][1]['density']
except KeyError: crux_data_dict['first_input_delay_ni'] = np.nan
try: crux_data_dict['first_input_delay_bad'] = result['record']['metrics']['first_input_delay']['histogram'][2]['density']
except KeyError: crux_data_dict['first_input_delay_bad'] = np.nan
# Get LCP data
try: crux_data_dict['largest_contentful_paint'] = result['record']['metrics']['largest_contentful_paint']['percentiles']['p75']
except KeyError: crux_data_dict['largest_contentful_paint'] = np.nan
try: crux_data_dict['largest_contentful_paint_good'] = result['record']['metrics']['largest_contentful_paint']['histogram'][0]['density']
except KeyError: crux_data_dict['largest_contentful_paint_good'] = np.nan
try: crux_data_dict['largest_contentful_paint_ni'] = result['record']['metrics']['largest_contentful_paint']['histogram'][1]['density']
except KeyError: crux_data_dict['largest_contentful_paint_ni'] = np.nan
try: crux_data_dict['largest_contentful_paint_bad'] = result['record']['metrics']['largest_contentful_paint']['histogram'][2]['density']
except KeyError: crux_data_dict['largest_contentful_paint_bad'] = np.nan
# If the request fails
except Exception as e:
# print(e)
crux_data_dict['url'] = url
crux_data_dict['url_netloc'] = url_netloc
crux_data_dict['device'] = formFactor
crux_data_dict['status'] = 'ERROR - ' + str(e)
# After the request - append data to remaining data
df_crux = df_crux.append(pd.DataFrame(crux_data_dict, columns=crux_data_dict.keys(), index=[0]), ignore_index=True)
# If backup is needed, save every 10th stepts during loope
counter += 1
if counter % 10 == 0:
df_crux.to_csv(f'crux_results_{date_today}_wip.csv', sep=';', index=True)
# Set time to sleep between requests
time.sleep(0.3)
df_crux.to_csv(f'crux_results_{date_today}_final.csv', sep=';', index=True)
df_crux
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment