Skip to content

Instantly share code, notes, and snippets.

@ankona
Last active February 2, 2024 21:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ankona/d6978d432ce29a993fbea76bb0ae926c to your computer and use it in GitHub Desktop.
Save ankona/d6978d432ce29a993fbea76bb0ae926c to your computer and use it in GitHub Desktop.
Simple method of resampling an input using diff tracking
import pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
file_name = pathlib.Path("generated.csv")
def generate(min_value: float, max_value: float, normal_dy: float, num_timesteps, axis) -> None:
sample = np.random.randn()
total_range = max_value - min_value
difference = sample * total_range
last_value = min_value + difference
with open(str(file_name), "w", encoding="utf-8") as out_fp:
out_fp.write(f"{last_value}\n")
for i in range(num_timesteps):
sample = np.random.randn()
difference = sample * normal_dy
spikeprob = np.random.randn()
is_spike = spikeprob < 0.01
if is_spike:
# let's amp up the signal
difference *= 2
last_value += difference
out_fp.write(f"{last_value}\n")
df = pd.read_csv(str(file_name), header=None, names=["memory"])
print(df.head())
axis.set_title("original faux data")
axis.set_ylabel("memory consumption")
axis.set_xlabel("time")
axis.plot(df, linewidth=2, color="magenta", label="original")
def generate_sin(xmin: float, xmax: float, axis) -> None:
x = np.linspace(xmin, xmax, 100000)
y = np.sin(x)
with open(str(file_name), "w", encoding="utf-8") as out_fp:
for yval in y:
out_fp.write(f"{yval}\n")
df = pd.read_csv(str(file_name), header=None, names=["memory"])
print(df.head())
axis.set_title(f"original faux data, sz={df.shape[0]}")
axis.set_ylabel("memory consumption")
axis.set_xlabel("time")
axis.plot(df, linewidth=2, color="magenta", label="original")
def downsample(factor: float, axis) -> None:
df = pd.read_csv(str(file_name), header=None, names=["memory"])
threshold = (df.max() - df.min()).memory / factor
value = df.iloc[0].memory
resampled_file = str(file_name).replace(".", f"_resampled_{factor}.")
with open(resampled_file, "w", encoding="utf-8") as out_fp:
for row in range(df.shape[0]):
curr_value = df.iloc[row].memory
if abs(abs(curr_value) - abs(value)) > threshold:
out_fp.write(f"{row},{curr_value}\n")
value = curr_value
df2 = pd.read_csv(resampled_file, index_col=0)
axis.set_title(f"RESAMPLED f={factor},sz={df2.shape[0]}")
axis.set_ylabel("memory consumption")
axis.set_xlabel("time")
axis.plot(df2, linewidth=1, alpha=0.8, color="cyan", label="resampled")
if __name__ == "__main__":
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9)) = plt.subplots(3, 3)
fig.set_figheight(12)
fig.set_figwidth(12)
generate(1000, 1500, 10, 100_000, ax1)
# generate_sin(-10, 10, ax1)
axes = [ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9]
for axis, factor in zip(axes, (5, 7, 10, 20, 50, 100, 150, 200)):
downsample(factor, axis)
plt.savefig(pathlib.Path(file_name).with_suffix(".png"))
@ankona
Copy link
Author

ankona commented Feb 2, 2024

image

@ankona
Copy link
Author

ankona commented Feb 2, 2024

Oscillating data

generated

generated

@ankona
Copy link
Author

ankona commented Feb 2, 2024

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment