fpcorso/customer-license-calculations.py

## customer-license-calculations.py
# Script: EDD Customer License Calculations
# Author: Frank Corso
# Date Created: 04/06/2020
# Last Modified: 07/21/2020
# Python Version: 3.6.5

# Takes exports of licenses from EDD and turns it into a CSV of customers with total number of licenses,
# total number of sites, active licenses, and total active sites. This is meant as a stepping stone for
# further analysis.
# To use, have CSVs of licenses for each download in EDD in a 'licenses' directory with each
# saved as edd-export-licenses-(download-slug).csv
# Put this script above that directory. In terminal, run python customer_license_calculations.py

from os import listdir
from os import path

import pandas as pd
import re


def calculate_customer_stats():
    """Loads license data and creates an individual customer report."""

    # Edit this if you name your CSV files something different.
    p = re.compile(r'edd-export-licenses-(.+).csv')
    customers = {}
    extensions = []

    # Edit this to change input and output directories
    in_dir = 'licenses'
    out_dir = 'data-output'

    # Loop over all files in directory.
    for file in listdir(in_dir):
        print('Loading...', file)

        # Checks filename for REGEX defined above. If matches, uses that as download name.
        # If not matching, skips this file.
        extension_search = p.findall(file)
        if len(extension_search) == 0:
            print('Not using this file...')
            continue
        extension_name = extension_search[0].replace('-', ' ').replace('_', ' ').title()
        extensions.append(extension_name)

        # Loads licenses CSV into a dict
        df = pd.read_csv(path.join(in_dir, file))
        licenses = df.to_dict('records')

        # Cycles over license keys in dict and add info to our customer list
        for single in licenses:
            try:
                customer_id = float(single['Customer ID'])
            except:
                print('Error with Customer ID: ', single['Customer ID'])
                continue
            if customer_id not in customers:
                customers[customer_id] = {
                    'Customer Email': single['Customer Email'],
                    'Total Licences': 1,
                    'Total Active Licenses': 1 if single['License Status'] == 'active' else 0,
                    'Total Sites': single['Activation Count'],
                    'Total Active Sites': single['Activation Count'] if single['License Status'] == 'active' else 0,
                    extension_name: 1
                }
            else:
                customers[customer_id]['Total Licences'] += 1
                customers[customer_id]['Total Sites'] += single['Activation Count']
                customers[customer_id][extension_name] = 1
                if single['License Status'] == 'active':
                    customers[customer_id]['Total Active Licenses'] += 1
                    customers[customer_id]['Total Active Sites'] += single['Activation Count']

    # Puts data back into dataframe and adds our average calculations.
    cust_df = pd.DataFrame.from_dict(customers, 'index')
    for extension in extensions:
        cust_df[extension] = cust_df[extension].fillna(0).astype('int')
    cust_df.index.name = 'Customer ID'
    cust_df['AVG Sites Per License'] = round(cust_df['Total Sites'] / cust_df['Total Licences'], 2)
    cust_df['AVG Active Site Per License'] = cust_df['Total Active Sites'] / cust_df['Total Active Licenses']
    cust_df['AVG Active Site Per License'] = cust_df['AVG Active Site Per License'].fillna(0)

    # Puts dataframe into our new CSV.
    print('Writing results to file...')
    cust_df.to_csv(path.join(out_dir, 'customer-license-data.csv'))


if __name__ == '__main__':
    print('Calculating customer stats...')
    calculate_customer_stats()
	# Script: EDD Customer License Calculations
	# Author: Frank Corso
	# Date Created: 04/06/2020
	# Last Modified: 07/21/2020
	# Python Version: 3.6.5

	# Takes exports of licenses from EDD and turns it into a CSV of customers with total number of licenses,
	# total number of sites, active licenses, and total active sites. This is meant as a stepping stone for
	# further analysis.
	# To use, have CSVs of licenses for each download in EDD in a 'licenses' directory with each
	# saved as edd-export-licenses-(download-slug).csv
	# Put this script above that directory. In terminal, run python customer_license_calculations.py

	from os import listdir
	from os import path

	import pandas as pd
	import re


	def calculate_customer_stats():
	"""Loads license data and creates an individual customer report."""

	# Edit this if you name your CSV files something different.
	p = re.compile(r'edd-export-licenses-(.+).csv')
	customers = {}
	extensions = []

	# Edit this to change input and output directories
	in_dir = 'licenses'
	out_dir = 'data-output'

	# Loop over all files in directory.
	for file in listdir(in_dir):
	print('Loading...', file)

	# Checks filename for REGEX defined above. If matches, uses that as download name.
	# If not matching, skips this file.
	extension_search = p.findall(file)
	if len(extension_search) == 0:
	print('Not using this file...')
	continue
	extension_name = extension_search[0].replace('-', ' ').replace('_', ' ').title()
	extensions.append(extension_name)

	# Loads licenses CSV into a dict
	df = pd.read_csv(path.join(in_dir, file))
	licenses = df.to_dict('records')

	# Cycles over license keys in dict and add info to our customer list
	for single in licenses:
	try:
	customer_id = float(single['Customer ID'])
	except:
	print('Error with Customer ID: ', single['Customer ID'])
	continue
	if customer_id not in customers:
	customers[customer_id] = {
	'Customer Email': single['Customer Email'],
	'Total Licences': 1,
	'Total Active Licenses': 1 if single['License Status'] == 'active' else 0,
	'Total Sites': single['Activation Count'],
	'Total Active Sites': single['Activation Count'] if single['License Status'] == 'active' else 0,
	extension_name: 1
	}
	else:
	customers[customer_id]['Total Licences'] += 1
	customers[customer_id]['Total Sites'] += single['Activation Count']
	customers[customer_id][extension_name] = 1
	if single['License Status'] == 'active':
	customers[customer_id]['Total Active Licenses'] += 1
	customers[customer_id]['Total Active Sites'] += single['Activation Count']

	# Puts data back into dataframe and adds our average calculations.
	cust_df = pd.DataFrame.from_dict(customers, 'index')
	for extension in extensions:
	cust_df[extension] = cust_df[extension].fillna(0).astype('int')
	cust_df.index.name = 'Customer ID'
	cust_df['AVG Sites Per License'] = round(cust_df['Total Sites'] / cust_df['Total Licences'], 2)
	cust_df['AVG Active Site Per License'] = cust_df['Total Active Sites'] / cust_df['Total Active Licenses']
	cust_df['AVG Active Site Per License'] = cust_df['AVG Active Site Per License'].fillna(0)

	# Puts dataframe into our new CSV.
	print('Writing results to file...')
	cust_df.to_csv(path.join(out_dir, 'customer-license-data.csv'))


	if __name__ == '__main__':
	print('Calculating customer stats...')
	calculate_customer_stats()