Skip to content

Instantly share code, notes, and snippets.

@cweibel
Created Nov 15, 2021
Embed
What would you like to do?
Python script to scrape the CF CLI
#!/usr/bin/env python3
# Welcome! This script uses the CF API to look for a specific set of ENV variables which an app has
# been deployed with. If a hit is found, the org name, space name and app name are retrieved along
# with the list of matching ENV variables.
#
# This script requires 3 arguments in order:
# - system_domain - Example: 172.18.0.150.nip.io
# - cf client id - Example: cf_smoke_tests
# - cf client secret - Example: nice_try_redacted_4u
#
# Example execution:
# $ python3 read-cf-apps-kubecf.py 172.18.0.150.nip.io cf_smoke_tests $(kubectl get secret --namespace kubecf var-uaa-clients-cf-smoke-tests-secret -o jsonpath='{.data.password}' | base64 --decode)
#
# Sample output:
#
# Found 12 apps on 2 pages
#
# Processing page 1/2
# Processing page 2/2
# buildpack_total:3
# org:system,space:test,app:app1,env:['MEMORY_CALCULATOR_V1=true']
# org:org2,space:test2,app:app1org,env:['MEMORY_CALCULATOR_V1=false']
# org:org2,space:test2,app:app14,env:['MEMORY_CALCULATOR_V1=true', 'ACCESS_LOG_FILE=/app/log']
import requests
from requests.structures import CaseInsensitiveDict
import sys
import warnings
# Disable SSL Warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
# Login
token = ""
system_domain = ""
if len(sys.argv) == 3:
# get token from cf oauth-token
system_domain = sys.argv[1]
token = sys.argv[2]
elif len(sys.argv) == 4:
system_domain = sys.argv[1]
client = sys.argv[2]
secret = sys.argv[3]
oauth_url = "https://uaa." + system_domain + "/oauth/token?grant_type=client_credentials"
token = requests.get(url=oauth_url, auth=(client, secret), verify=False).json()["access_token"]
token = "bearer " + token
else:
print(len(sys.argv))
print("You must provide 3 arguments: system_domain, client_id, client_secret")
exit(1)
headers = CaseInsensitiveDict()
headers["Accept"] = "application/json"
headers["Authorization"] = token
# https://v3-apidocs.cloudfoundry.org/version/3.107.0/index.html#list-apps
apps_url = "https://api." + system_domain + "/v3/apps/?per_page=100"
# Looking for these environment variables
needles = ("MEMORY_CALCULATOR_V1", "ACCESS_LOG_FILE")
# Looking for environment variables prefixed with
prefix = "JBP"
entries = requests.get(apps_url, headers=headers, verify=False).json()
total_results = entries["pagination"]["total_results"]
total_pages = entries["pagination"]["total_pages"]
current_page = 1
apps = {"buildpack_total": 0}
print("Found " + str(total_results) + " apps on " + str(total_pages) + " pages")
print()
# Function to retrieve org and space name for an app
def get_org_space(space_url, headers):
space_vars = requests.get(space_url, headers=headers, verify=False).json()
space_name = space_vars["name"]
org_url = space_vars["links"]["organization"]["href"]
org_vars = requests.get(org_url, headers=headers, verify=False).json()
org_name = org_vars["name"]
return org_name, space_name
# Function to retrieve droplet's detected/specified buildpack name
def get_buildpack(current_droplet_url, headers):
buildpack_vars = requests.get(current_droplet_url, headers=headers, verify=False).json()
buildpack_json = buildpack_vars["buildpacks"] if "buildpacks" in buildpack_vars else "Not Found"
return buildpack_json
while True:
print("Processing page " + str(current_page) + "/" + str(total_pages))
for entry in entries["resources"]:
# Retrieve buildpack name
current_droplet_url = entry["links"]["current_droplet"]["href"]
buildpack_json = str(get_buildpack(current_droplet_url, headers=headers))
# Filter for apps running the java_buildpack
if "buildpacks" in entry["lifecycle"]["data"] and "java_buildpack" in buildpack_json:
apps["buildpack_total"] += 1
env_vars_url = entry["links"]["environment_variables"]["href"]
env_vars = requests.get(env_vars_url, headers=headers, verify=False).json()
# Retreive org name, space name and create label for array of ENV variables to be concatenated to
space_url = entry["links"]["space"]["href"]
org_name, space_name = get_org_space(space_url, headers)
line_label = "org:" +org_name + ",space:" + space_name + ",buildpack:" + buildpack_json + ",app:" + entry["name"]+",env"
apps[line_label] = []
for key, value in env_vars["var"].items():
if key in needles or key.startswith(prefix):
apps[line_label].append(str(key) + "=" + str(value))
current_page += 1
if entries["pagination"]["next"] is None:
break
entries = requests.get(entries["pagination"]["next"]["href"], headers=headers, verify=False).json()
for key, value in apps.items():
print(str(key) + ":" + str(value))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment