fortran01/fetch_ts.py

## fetch_ts.py
import json
import os
import pandas as pd

testing_flag = False  # Set to True to only fetch data for the first two servers with machine stats

def list_servers_without_machine_info(df_servers):
    df_servers_without_machine_info = df_servers[
        df_servers['ram_allocated_gb'].isna()
        | df_servers['storage_allocated_gb'].isna()
        | df_servers['storage_used_gb'].isna()
        | df_servers['cpu_count'].isna()
        | df_servers['ram_used_gb'].isna()
        | df_servers['cpu_name'].isna()
    ]
    # Filter out servers without apps
    df_servers_without_machine_info = df_servers_without_machine_info[
        df_servers_without_machine_info['apps'].apply(lambda x: list(x) != [])
    ]
    print("List of all fields available in the dataframe:")
    print(df_servers_without_machine_info.columns.tolist())
    # Narrow down the columns
    df_servers_without_machine_info = df_servers_without_machine_info[['id',
        'host_name',
        'description',
        'ram_allocated_gb',
        'storage_allocated_gb',
        'storage_used_gb',
        'cpu_count',
        'ram_used_gb',
        'virtual',
        'cpu_name',
        'operating_system',
        'apps']]
    df_servers_without_machine_info.to_csv('servers_without_machine_info.csv', index=False)

def clean_servers_without_measurements(df_no_measurements):

    df_no_measurements = df_no_measurements[df_no_measurements['apps'].apply(lambda x: list(x) != [])]
    df_no_measurements['apps'] = df_no_measurements['apps'].apply(lambda x: [app['name'] for app in x])
    print("List of all fields available in the dataframe:")
    print(df_no_measurements.columns.tolist())
    df_no_measurements = df_no_measurements[['id', 'host_name', 'description', 'ram_allocated_gb', 'storage_allocated_gb', 'storage_used_gb', 'cpu_count', 'ram_used_gb', 'virtual', 'cpu_name', 'operating_system', 'apps']]
    df_no_measurements.to_csv('servers_no_measurements_but_with_apps.csv', index=False)

# Get the list of servers from the Tidal API or from the feather file
user_input = input("Do you want to get the latest servers list or get it from servers.feather? (Enter 'l for latest' or 'f for feather'): ")

if user_input.lower() == 'l':
    # Execute `tidal export servers > servers.json`
    # Then parse servers.json
    # Server ID is in the `id` field and server name is in the `host_name` field
    out = os.popen('tidal export servers').read()
    servers = json.loads(out)
    # Store the latest servers list in a feather file
    df_servers = pd.DataFrame(servers)
    df_servers.to_feather('servers.feather')
elif user_input.lower() == 'f':
    # Load the servers list from the feather file
    servers = pd.read_feather('servers.feather').to_dict('records')
else:
    print("Invalid input. Please enter either 'latest' or 'feather'.")

df_servers = pd.DataFrame(servers)
df_servers.to_csv('servers.csv', index=False)
list_servers_without_machine_info(df_servers)

# Define an empty list to store the dataframes for each server
dataframes = []

# Define an empty list to store the servers without measurements
servers_without_measurements = []

user_input = input("Do you want to get the latest machine stats or get it from output.feather? (Enter 'l for latest' or 'f for feather'): ")

if user_input.lower() == 'l':
    # Iterate over each server ID
    iteration_count = 0

    for s in servers:
        # Call the Tidal API to get the time-series data for the current server ID
        # Replace {server_id} with the actual server ID
        # The --csv-output flag will output the data in CSV format
        command = f'tidal request -X GET "/api/v1/servers/{s["id"]}/measurements?filters[field_name]=cpu_utilization_timeseries" --csv-output 2>&1'
        output = os.popen(command).read()

        # Create a list of the CSV output lines
        lines = output.strip().split('\n')

        # If any of the lines contain the string '404 [No measurements found', then skip the server
        if any('404 [No measurements found' in line for line in lines):
            print(f'No measurements found for server {s["host_name"]}')
            servers_without_measurements.append(s)
            continue
        else:
            # Filter out lines that are not in CSV format
            csv_lines = [line for line in lines if ',' in line]

            # If there are no CSV lines, then skip the server
            if not csv_lines:
                print(f'No CSV measurements found for server {s["host_name"]}')
                servers_without_measurements.append(s)
                continue

            # Print a few lines of the CSV output
            print('\n'.join(csv_lines[:]))
            # Sample output
            # created_at,external_timestamp,field_name,id,measurable_id,measurable_type,updated_at,value
            # 2023-11-14T21:13:01.838Z,2023-11-14T21:12:10.000Z,cpu_utilization_timeseries,167281,400,Server,2023-11-14T21:13:01.838Z,0.4372267332916957
            # 2023-11-14T21:22:59.992Z,2023-11-14T21:22:08.000Z,cpu_utilization_timeseries,167429,400,Server,2023-11-14T21:22:59.992Z,0.2498438475952547
            # 2023-11-15T01:23:01.992Z,2023-11-15T01:22:09.000Z,cpu_utilization_timeseries,173401,400,Server,2023-11-15T01:23:01.992Z,0.3121098626716612
            # 2023-11-14T21:32:59.220Z,2023-11-14T21:32:08.000Z,cpu_utilization_timeseries,167687,400,Server,2023-11-14T21:32:59.220Z,0.6246096189881367
            print(f'I see CSV measurements for server {s["host_name"]} with id {s["id"]}')

            # Parse the header in the first row
            header = csv_lines[0].split(',')

            # Create a list of dictionaries from the remaining lines
            rows = [dict(zip(header, line.split(','))) for line in csv_lines[1:]]

            # Create a dataframe from the list of dictionaries
            df = pd.DataFrame(rows)

            # Add a column for the server name
            df['server_name'] = s['host_name']

            # Reorder the columns
            df = df[['server_name', 'measurable_id', 'id', 'external_timestamp', 'field_name', 'value', 'created_at', 'measurable_type', 'updated_at']]

            # Append the dataframe to the list
            dataframes.append(df)

            iteration_count += 1

        if testing_flag and iteration_count == 2:  # Only break if testing_flag is True
            break

    # Concatenate the dataframes into a single dataframe
    result = pd.concat(dataframes)

    # Save the dataframe to a feather file
    result.reset_index().to_feather('output.feather')

    # Convert servers_without_measurements to a DataFrame and save to a feather file
    df_no_measurements = pd.DataFrame(servers_without_measurements)
    df_no_measurements.reset_index().to_feather('df_no_measurements.feather')

elif user_input.lower() == 'f':
    # Load the dataframe from the feather file
    result = pd.read_feather('output.feather')

    # Load df_no_measurements from the feather file
    df_no_measurements = pd.read_feather('df_no_measurements.feather')

else:
    print("Invalid input. Please enter either 'latest' or 'feather'.")

# Write the dataframe to a CSV file
result.to_csv('output.csv', index=False)

# Ensure 'value' is numeric before aggregation
result['value'] = pd.to_numeric(result['value'], errors='coerce')

# Aggregate the 'value' field by server by finding the average, standard deviation, min and max
result = result.groupby('server_name')['value'].agg(['mean', 'std', 'min', 'max'])

# Write the aggregated dataframe to a CSV file
result.reset_index().to_csv('output_aggregated.csv', index=False)

clean_servers_without_measurements(df_no_measurements)

print("CSV files generated:")
print("1. output.csv: This file contains the raw data fetched from the servers.")
print("2. output_aggregated.csv: This file contains the aggregated data (mean, std, min, max) for each server.")
print("3. servers_no_measurements_but_with_apps.csv: This file contains the details of servers without measurements but with apps.")
print("4. servers_without_machine_info.csv: This file contains the details of servers without machine info.")
	import json
	import os
	import pandas as pd

	testing_flag = False # Set to True to only fetch data for the first two servers with machine stats

	def list_servers_without_machine_info(df_servers):
	df_servers_without_machine_info = df_servers[
	df_servers['ram_allocated_gb'].isna()
	\| df_servers['storage_allocated_gb'].isna()
	\| df_servers['storage_used_gb'].isna()
	\| df_servers['cpu_count'].isna()
	\| df_servers['ram_used_gb'].isna()
	\| df_servers['cpu_name'].isna()
	]
	# Filter out servers without apps
	df_servers_without_machine_info = df_servers_without_machine_info[
	df_servers_without_machine_info['apps'].apply(lambda x: list(x) != [])
	]
	print("List of all fields available in the dataframe:")
	print(df_servers_without_machine_info.columns.tolist())
	# Narrow down the columns
	df_servers_without_machine_info = df_servers_without_machine_info[['id',
	'host_name',
	'description',
	'ram_allocated_gb',
	'storage_allocated_gb',
	'storage_used_gb',
	'cpu_count',
	'ram_used_gb',
	'virtual',
	'cpu_name',
	'operating_system',
	'apps']]
	df_servers_without_machine_info.to_csv('servers_without_machine_info.csv', index=False)

	def clean_servers_without_measurements(df_no_measurements):

	df_no_measurements = df_no_measurements[df_no_measurements['apps'].apply(lambda x: list(x) != [])]
	df_no_measurements['apps'] = df_no_measurements['apps'].apply(lambda x: [app['name'] for app in x])
	print("List of all fields available in the dataframe:")
	print(df_no_measurements.columns.tolist())
	df_no_measurements = df_no_measurements[['id', 'host_name', 'description', 'ram_allocated_gb', 'storage_allocated_gb', 'storage_used_gb', 'cpu_count', 'ram_used_gb', 'virtual', 'cpu_name', 'operating_system', 'apps']]
	df_no_measurements.to_csv('servers_no_measurements_but_with_apps.csv', index=False)

	# Get the list of servers from the Tidal API or from the feather file
	user_input = input("Do you want to get the latest servers list or get it from servers.feather? (Enter 'l for latest' or 'f for feather'): ")

	if user_input.lower() == 'l':
	# Execute `tidal export servers > servers.json`
	# Then parse servers.json
	# Server ID is in the `id` field and server name is in the `host_name` field
	out = os.popen('tidal export servers').read()
	servers = json.loads(out)
	# Store the latest servers list in a feather file
	df_servers = pd.DataFrame(servers)
	df_servers.to_feather('servers.feather')
	elif user_input.lower() == 'f':
	# Load the servers list from the feather file
	servers = pd.read_feather('servers.feather').to_dict('records')
	else:
	print("Invalid input. Please enter either 'latest' or 'feather'.")

	df_servers = pd.DataFrame(servers)
	df_servers.to_csv('servers.csv', index=False)
	list_servers_without_machine_info(df_servers)

	# Define an empty list to store the dataframes for each server
	dataframes = []

	# Define an empty list to store the servers without measurements
	servers_without_measurements = []

	user_input = input("Do you want to get the latest machine stats or get it from output.feather? (Enter 'l for latest' or 'f for feather'): ")

	if user_input.lower() == 'l':
	# Iterate over each server ID
	iteration_count = 0

	for s in servers:
	# Call the Tidal API to get the time-series data for the current server ID
	# Replace {server_id} with the actual server ID
	# The --csv-output flag will output the data in CSV format
	command = f'tidal request -X GET "/api/v1/servers/{s["id"]}/measurements?filters[field_name]=cpu_utilization_timeseries" --csv-output 2>&1'
	output = os.popen(command).read()

	# Create a list of the CSV output lines
	lines = output.strip().split('\n')

	# If any of the lines contain the string '404 [No measurements found', then skip the server
	if any('404 [No measurements found' in line for line in lines):
	print(f'No measurements found for server {s["host_name"]}')
	servers_without_measurements.append(s)
	continue
	else:
	# Filter out lines that are not in CSV format
	csv_lines = [line for line in lines if ',' in line]

	# If there are no CSV lines, then skip the server
	if not csv_lines:
	print(f'No CSV measurements found for server {s["host_name"]}')
	servers_without_measurements.append(s)
	continue

	# Print a few lines of the CSV output
	print('\n'.join(csv_lines[:]))
	# Sample output
	# created_at,external_timestamp,field_name,id,measurable_id,measurable_type,updated_at,value
	# 2023-11-14T21:13:01.838Z,2023-11-14T21:12:10.000Z,cpu_utilization_timeseries,167281,400,Server,2023-11-14T21:13:01.838Z,0.4372267332916957
	# 2023-11-14T21:22:59.992Z,2023-11-14T21:22:08.000Z,cpu_utilization_timeseries,167429,400,Server,2023-11-14T21:22:59.992Z,0.2498438475952547
	# 2023-11-15T01:23:01.992Z,2023-11-15T01:22:09.000Z,cpu_utilization_timeseries,173401,400,Server,2023-11-15T01:23:01.992Z,0.3121098626716612
	# 2023-11-14T21:32:59.220Z,2023-11-14T21:32:08.000Z,cpu_utilization_timeseries,167687,400,Server,2023-11-14T21:32:59.220Z,0.6246096189881367
	print(f'I see CSV measurements for server {s["host_name"]} with id {s["id"]}')

	# Parse the header in the first row
	header = csv_lines[0].split(',')

	# Create a list of dictionaries from the remaining lines
	rows = [dict(zip(header, line.split(','))) for line in csv_lines[1:]]

	# Create a dataframe from the list of dictionaries
	df = pd.DataFrame(rows)

	# Add a column for the server name
	df['server_name'] = s['host_name']

	# Reorder the columns
	df = df[['server_name', 'measurable_id', 'id', 'external_timestamp', 'field_name', 'value', 'created_at', 'measurable_type', 'updated_at']]

	# Append the dataframe to the list
	dataframes.append(df)

	iteration_count += 1

	if testing_flag and iteration_count == 2: # Only break if testing_flag is True
	break

	# Concatenate the dataframes into a single dataframe
	result = pd.concat(dataframes)

	# Save the dataframe to a feather file
	result.reset_index().to_feather('output.feather')

	# Convert servers_without_measurements to a DataFrame and save to a feather file
	df_no_measurements = pd.DataFrame(servers_without_measurements)
	df_no_measurements.reset_index().to_feather('df_no_measurements.feather')

	elif user_input.lower() == 'f':
	# Load the dataframe from the feather file
	result = pd.read_feather('output.feather')

	# Load df_no_measurements from the feather file
	df_no_measurements = pd.read_feather('df_no_measurements.feather')

	else:
	print("Invalid input. Please enter either 'latest' or 'feather'.")

	# Write the dataframe to a CSV file
	result.to_csv('output.csv', index=False)

	# Ensure 'value' is numeric before aggregation
	result['value'] = pd.to_numeric(result['value'], errors='coerce')

	# Aggregate the 'value' field by server by finding the average, standard deviation, min and max
	result = result.groupby('server_name')['value'].agg(['mean', 'std', 'min', 'max'])

	# Write the aggregated dataframe to a CSV file
	result.reset_index().to_csv('output_aggregated.csv', index=False)

	clean_servers_without_measurements(df_no_measurements)

	print("CSV files generated:")
	print("1. output.csv: This file contains the raw data fetched from the servers.")
	print("2. output_aggregated.csv: This file contains the aggregated data (mean, std, min, max) for each server.")
	print("3. servers_no_measurements_but_with_apps.csv: This file contains the details of servers without measurements but with apps.")
	print("4. servers_without_machine_info.csv: This file contains the details of servers without machine info.")