Created
October 23, 2018 18:45
-
-
Save icexelloss/d901fba5285ea7de769170fafd26b4ab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyspark.sql.functions as F | |
# Returns of a particular stock. | |
# 1.01 means the stock goes up 1%; 0.95 means the stock goes down 5% | |
df = ... | |
# time, return | |
# 20180101, 1.01 | |
# 20180102, 0.95 | |
# 20180103, 1.05 | |
# ... | |
# The first addSummaryColumns adds a column 'return_product' which is the cumulative return of each day | |
# The second addSummaryColumns adds a column 'return_product_max' which is the max cumulative return up until each day | |
cum_returns = df.addSummaryColumns(summarizers.product('return')) \ | |
.addSummaryColumns(summarizers.max('return_product')) \ | |
.toDF('time', 'return', 'cum_return', 'max_cum_return') | |
drawdowns = cum_returns.withColumn( | |
'drawdown', | |
1 - cum_returns['cum_return'] / cum_returns['max_cum_return']) | |
max_drawdown = drawdowns.agg(F.max('drawdown')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment