cweibel/scrape.py

## scrape.py
#!/usr/bin/env python3

# Welcome! This script uses the CF API to look for a specific set of ENV variables which an app has
# been deployed with.  If a hit is found, the org name, space name and app name are retrieved along
# with the list of matching ENV variables.
#
# This script requires 3 arguments in order:
#  - system_domain    - Example: 172.18.0.150.nip.io
#  - cf client id     - Example: cf_smoke_tests
#  - cf client secret - Example: nice_try_redacted_4u
#
# Example execution:
# $ python3 read-cf-apps-kubecf.py 172.18.0.150.nip.io cf_smoke_tests $(kubectl get secret --namespace kubecf var-uaa-clients-cf-smoke-tests-secret -o jsonpath='{.data.password}' | base64 --decode)
#
# Sample output:
#
#   Found 12 apps on 2 pages
#
#   Processing page 1/2
#   Processing page 2/2
#   buildpack_total:3
#   org:system,space:test,app:app1,env:['MEMORY_CALCULATOR_V1=true']
#   org:org2,space:test2,app:app1org,env:['MEMORY_CALCULATOR_V1=false']
#   org:org2,space:test2,app:app14,env:['MEMORY_CALCULATOR_V1=true', 'ACCESS_LOG_FILE=/app/log']

import requests
from requests.structures import CaseInsensitiveDict

import sys
import warnings

# Disable SSL Warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")

# Login

token = ""
system_domain = ""
if len(sys.argv) == 3:
    # get token from cf oauth-token
    system_domain = sys.argv[1]
    token = sys.argv[2]

elif len(sys.argv) == 4:
    system_domain = sys.argv[1]
    client = sys.argv[2]
    secret  = sys.argv[3]
    oauth_url = "https://uaa." + system_domain + "/oauth/token?grant_type=client_credentials"

    token = requests.get(url=oauth_url, auth=(client, secret),  verify=False).json()["access_token"]
    token = "bearer " + token

else:
    print(len(sys.argv))
    print("You must provide 3 arguments: system_domain, client_id, client_secret")
    exit(1)


headers = CaseInsensitiveDict()
headers["Accept"] = "application/json"
headers["Authorization"] = token

# https://v3-apidocs.cloudfoundry.org/version/3.107.0/index.html#list-apps
apps_url = "https://api." + system_domain + "/v3/apps/?per_page=100"


# Looking for these environment variables
needles = ("MEMORY_CALCULATOR_V1", "ACCESS_LOG_FILE")
# Looking for environment variables prefixed with
prefix = "JBP"

entries = requests.get(apps_url, headers=headers, verify=False).json()

total_results = entries["pagination"]["total_results"]
total_pages = entries["pagination"]["total_pages"]
current_page = 1

apps = {"buildpack_total": 0}

print("Found " + str(total_results) + " apps on " + str(total_pages) + " pages")
print()

# Function to retrieve org and space name for an app
def get_org_space(space_url, headers):
    space_vars = requests.get(space_url, headers=headers, verify=False).json()
    space_name = space_vars["name"]

    org_url = space_vars["links"]["organization"]["href"]
    org_vars = requests.get(org_url, headers=headers, verify=False).json()
    org_name = org_vars["name"]

    return org_name, space_name

# Function to retrieve droplet's detected/specified buildpack name
def get_buildpack(current_droplet_url, headers):

    buildpack_vars = requests.get(current_droplet_url, headers=headers, verify=False).json()
    buildpack_json = buildpack_vars["buildpacks"] if "buildpacks" in buildpack_vars else "Not Found"
    return buildpack_json


while True:
    print("Processing page " + str(current_page) + "/" + str(total_pages))

    for entry in entries["resources"]:

        # Retrieve buildpack name
        current_droplet_url =  entry["links"]["current_droplet"]["href"]
        buildpack_json = str(get_buildpack(current_droplet_url, headers=headers))

        # Filter for apps running the java_buildpack
        if "buildpacks" in entry["lifecycle"]["data"] and "java_buildpack" in buildpack_json:
            apps["buildpack_total"] += 1
            env_vars_url = entry["links"]["environment_variables"]["href"]
            env_vars = requests.get(env_vars_url, headers=headers, verify=False).json()


            # Retreive org name, space name and create label for array of ENV variables to be concatenated to
            space_url = entry["links"]["space"]["href"]
            org_name, space_name = get_org_space(space_url, headers)
            line_label = "org:" +org_name + ",space:" + space_name +  ",buildpack:" + buildpack_json + ",app:" + entry["name"]+",env"

            apps[line_label] = []
            for key, value in env_vars["var"].items():
                if key in needles or key.startswith(prefix):
                    apps[line_label].append(str(key) + "=" + str(value))
    current_page += 1

    if entries["pagination"]["next"] is None:
        break

    entries = requests.get(entries["pagination"]["next"]["href"], headers=headers, verify=False).json()


for key, value in apps.items():
    print(str(key) + ":" + str(value))
	#!/usr/bin/env python3

	# Welcome! This script uses the CF API to look for a specific set of ENV variables which an app has
	# been deployed with. If a hit is found, the org name, space name and app name are retrieved along
	# with the list of matching ENV variables.
	#
	# This script requires 3 arguments in order:
	# - system_domain - Example: 172.18.0.150.nip.io
	# - cf client id - Example: cf_smoke_tests
	# - cf client secret - Example: nice_try_redacted_4u
	#
	# Example execution:
	# $ python3 read-cf-apps-kubecf.py 172.18.0.150.nip.io cf_smoke_tests $(kubectl get secret --namespace kubecf var-uaa-clients-cf-smoke-tests-secret -o jsonpath='{.data.password}' \| base64 --decode)
	#
	# Sample output:
	#
	# Found 12 apps on 2 pages
	#
	# Processing page 1/2
	# Processing page 2/2
	# buildpack_total:3
	# org:system,space:test,app:app1,env:['MEMORY_CALCULATOR_V1=true']
	# org:org2,space:test2,app:app1org,env:['MEMORY_CALCULATOR_V1=false']
	# org:org2,space:test2,app:app14,env:['MEMORY_CALCULATOR_V1=true', 'ACCESS_LOG_FILE=/app/log']

	import requests
	from requests.structures import CaseInsensitiveDict

	import sys
	import warnings

	# Disable SSL Warnings
	if not sys.warnoptions:
	warnings.simplefilter("ignore")

	# Login

	token = ""
	system_domain = ""
	if len(sys.argv) == 3:
	# get token from cf oauth-token
	system_domain = sys.argv[1]
	token = sys.argv[2]

	elif len(sys.argv) == 4:
	system_domain = sys.argv[1]
	client = sys.argv[2]
	secret = sys.argv[3]
	oauth_url = "https://uaa." + system_domain + "/oauth/token?grant_type=client_credentials"

	token = requests.get(url=oauth_url, auth=(client, secret), verify=False).json()["access_token"]
	token = "bearer " + token

	else:
	print(len(sys.argv))
	print("You must provide 3 arguments: system_domain, client_id, client_secret")
	exit(1)


	headers = CaseInsensitiveDict()
	headers["Accept"] = "application/json"
	headers["Authorization"] = token

	# https://v3-apidocs.cloudfoundry.org/version/3.107.0/index.html#list-apps
	apps_url = "https://api." + system_domain + "/v3/apps/?per_page=100"


	# Looking for these environment variables
	needles = ("MEMORY_CALCULATOR_V1", "ACCESS_LOG_FILE")
	# Looking for environment variables prefixed with
	prefix = "JBP"

	entries = requests.get(apps_url, headers=headers, verify=False).json()

	total_results = entries["pagination"]["total_results"]
	total_pages = entries["pagination"]["total_pages"]
	current_page = 1

	apps = {"buildpack_total": 0}

	print("Found " + str(total_results) + " apps on " + str(total_pages) + " pages")
	print()

	# Function to retrieve org and space name for an app
	def get_org_space(space_url, headers):
	space_vars = requests.get(space_url, headers=headers, verify=False).json()
	space_name = space_vars["name"]

	org_url = space_vars["links"]["organization"]["href"]
	org_vars = requests.get(org_url, headers=headers, verify=False).json()
	org_name = org_vars["name"]

	return org_name, space_name

	# Function to retrieve droplet's detected/specified buildpack name
	def get_buildpack(current_droplet_url, headers):

	buildpack_vars = requests.get(current_droplet_url, headers=headers, verify=False).json()
	buildpack_json = buildpack_vars["buildpacks"] if "buildpacks" in buildpack_vars else "Not Found"
	return buildpack_json


	while True:
	print("Processing page " + str(current_page) + "/" + str(total_pages))

	for entry in entries["resources"]:

	# Retrieve buildpack name
	current_droplet_url = entry["links"]["current_droplet"]["href"]
	buildpack_json = str(get_buildpack(current_droplet_url, headers=headers))

	# Filter for apps running the java_buildpack
	if "buildpacks" in entry["lifecycle"]["data"] and "java_buildpack" in buildpack_json:
	apps["buildpack_total"] += 1
	env_vars_url = entry["links"]["environment_variables"]["href"]
	env_vars = requests.get(env_vars_url, headers=headers, verify=False).json()


	# Retreive org name, space name and create label for array of ENV variables to be concatenated to
	space_url = entry["links"]["space"]["href"]
	org_name, space_name = get_org_space(space_url, headers)
	line_label = "org:" +org_name + ",space:" + space_name + ",buildpack:" + buildpack_json + ",app:" + entry["name"]+",env"

	apps[line_label] = []
	for key, value in env_vars["var"].items():
	if key in needles or key.startswith(prefix):
	apps[line_label].append(str(key) + "=" + str(value))
	current_page += 1

	if entries["pagination"]["next"] is None:
	break

	entries = requests.get(entries["pagination"]["next"]["href"], headers=headers, verify=False).json()


	for key, value in apps.items():
	print(str(key) + ":" + str(value))