icexelloss/flint_maxdrawdown.py

## flint_maxdrawdown.py
import pyspark.sql.functions as F

# Returns of a particular stock.
# 1.01 means the stock goes up 1%; 0.95 means the stock goes down 5%
df = ...
# time, return
# 20180101, 1.01
# 20180102, 0.95
# 20180103, 1.05
# ...

# The first addSummaryColumns adds a column 'return_product' which is the cumulative return of each day
# The second addSummaryColumns adds a column 'return_product_max' which is the max cumulative return up until each day
cum_returns = df.addSummaryColumns(summarizers.product('return')) \
                .addSummaryColumns(summarizers.max('return_product')) \
                .toDF('time', 'return', 'cum_return', 'max_cum_return')

drawdowns = cum_returns.withColumn(
    'drawdown',
    1 - cum_returns['cum_return'] / cum_returns['max_cum_return'])

max_drawdown = drawdowns.agg(F.max('drawdown'))
	import pyspark.sql.functions as F

	# Returns of a particular stock.
	# 1.01 means the stock goes up 1%; 0.95 means the stock goes down 5%
	df = ...
	# time, return
	# 20180101, 1.01
	# 20180102, 0.95
	# 20180103, 1.05
	# ...

	# The first addSummaryColumns adds a column 'return_product' which is the cumulative return of each day
	# The second addSummaryColumns adds a column 'return_product_max' which is the max cumulative return up until each day
	cum_returns = df.addSummaryColumns(summarizers.product('return')) \
	.addSummaryColumns(summarizers.max('return_product')) \
	.toDF('time', 'return', 'cum_return', 'max_cum_return')

	drawdowns = cum_returns.withColumn(
	'drawdown',
	1 - cum_returns['cum_return'] / cum_returns['max_cum_return'])

	max_drawdown = drawdowns.agg(F.max('drawdown'))