This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Status exp_read_s3_scan_raw(const std::string& fs, | |
const std::string& bucket, | |
const std::vector<std::string>& filenames, | |
const int dup_factor) { | |
// Read a S3 table directly through scan node | |
std::cout << "exp_read_s3_scan_raw()" << std::endl; | |
std::vector<std::string> dup_filenames = filenames; | |
for (int i = 0; i < dup_factor; ++i) { | |
dup_filenames.insert(dup_filenames.end(), filenames.begin(), filenames.end()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Code: | |
Status exp_9() { | |
// For local filesystem reading, file:/// may work here as well. You may | |
// need to #include <arrow/dataset/file_base.h> | |
const std::string uri_string = "ts3://"; | |
// const std::string uri_string = "gs://"; | |
// Filesystem and path | |
ARROW_ASSIGN_OR_RAISE(const std::shared_ptr<arrow::fs::FileSystem> filesystem, | |
arrow::fs::FileSystemFromUri(uri_string, nullptr)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Building using CMake version: 3.22.2 | |
-- --------------------- | |
-- Boost_FOUND: | |
-- Boost_INCLUDE_DIRS: | |
-- Boost_LIBRARY_DIRS: | |
-- Boost_LIBRARIES: | |
-- --------------------- | |
-- Arrow version: 7.0.0 (full: '7.0.0-SNAPSHOT') | |
-- Arrow SO version: 700 (full: 700.0.0) | |
CMake Debug Log at cmake_modules/FindClangTools.cmake:50 (find_program): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyspark.sql.functions as F | |
# Returns of a particular stock. | |
# 1.01 means the stock goes up 1%; 0.95 means the stock goes down 5% | |
df = ... | |
# time, return | |
# 20180101, 1.01 | |
# 20180102, 0.95 | |
# 20180103, 1.05 | |
# ... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy import stats | |
@udf('double') | |
def boxcox(v): | |
return pd.Series(stats.boxcox(v)[0]) | |
df = … | |
# time, v |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.regression import LinearRegression | |
from pyspark.ml.feature import VectorAssembler | |
assembler = VectorAssembler( | |
inputCols=["previous_day_return", "previous_day_decayed_return"], | |
outputCol="features") | |
output = assembler.transform(sp500_decayed_return).select('return', 'features').toDF('label', 'features') | |
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ts.flint import udf | |
@udf('double', arg_type='numpy') | |
def decayed(columns): | |
v = columns[0] | |
decay = np.power(0.5, np.arange(len(v)))[::-1] | |
return (v * decay).sum() | |
sp500_decayed_return = sp500_joined_return.summarizeWindows( | |
window = windows.past_absolute_time('7day'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ts.flint import summarizers | |
sp500_decayed_return = sp500_joined_return.summarizeWindows( | |
window = windows.past_absolute_time('7day'), | |
summarizer = summarizers.ewma('previous_day_return', alpha=0.5) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sp500_joined_return = sp500_return.leftJoin(sp500_previous_day_return, tolerance='3days').dropna() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sp500_previous_day_return = sp500_return.shiftTime(windows.future_absolute_time('1day')).toDF('time', 'previous_day_return') | |
sp500_joined_return = sp500_return.leftJoin(sp500_return_previous_day) |
NewerOlder