Skip to content

Instantly share code, notes, and snippets.

@flockonus
Created March 23, 2024 00:12
Show Gist options
  • Save flockonus/b9d1435efd086871d55c9acf570aa9c8 to your computer and use it in GitHub Desktop.
Save flockonus/b9d1435efd086871d55c9acf570aa9c8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data input Generation\n",
"\n",
"Unfortunately not deterministic, would benefit from being.\n",
"\n",
"# Model training\n",
"\n",
"I'm keeping the last 60 items out of the training set for post benchmarking later.\n",
"\n",
"Mind for the split ratio I'm using 0.05 for the test/validation, which is arguably very small.\n",
"\n",
"`train, test = train_test_split(ticker_data, test_size=0.05)`\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"\n",
"def calculate(a: float, b: float, c: float, d: float, e: float) -> float:\n",
" \"\"\"\n",
" This function takes five floats as input and performs a series of\n",
" mathematical operations on them based on certain conditions.\n",
" \"\"\"\n",
"\n",
" if abs(a - b) < 0.001:\n",
" ret = math.sqrt(abs(c * d + e) + 3.1413)\n",
" elif a > b and c > d:\n",
" ret = math.log(abs(a * b * c) + 3.1413) + d - e\n",
" elif a < 0 and b > 0:\n",
" ret = (c + d) / (abs(a * e) + 3.1413)\n",
" else:\n",
" ret = a - b + math.log10(abs(c * d * e) + 3.1413)\n",
" return ret\n",
"\n",
"\n",
"N = 10000\n",
"LOW = -1\n",
"HIGH = 7\n",
"\n",
"# create 5 arrays of 10 random numbers\n",
"import numpy as np\n",
"\n",
"a = np.random.randint(LOW, HIGH, N)\n",
"b = np.random.randint(LOW, HIGH, N)\n",
"c = np.random.randint(LOW, HIGH, N)\n",
"d = np.random.randint(LOW, HIGH, N)\n",
"e = np.random.randint(LOW, HIGH, N)\n",
"\n",
"# calculate the result for each set of numbers\n",
"results = [calculate(a[i], b[i], c[i], d[i], e[i]) for i in range(N)]\n",
"\n",
"# concatanate all into a dataframe\n",
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\"a\": a, \"b\": b, \"c\": c, \"d\": d, \"e\": e, \"results\": results})\n",
"\n",
"# save to pickle\n",
"# df.to_pickle(\"./jars/data-0040-synthetic.pkl\")\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"omit_length = 60\n",
"targets = ['results']\n",
"\n",
"use = df.iloc[:-omit_length]\n",
"omit = df.iloc[-omit_length:]\n",
"\n",
"print(\"train\", use.shape, \"omit\", omit.shape)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment