Skip to content

Instantly share code, notes, and snippets.

@LCRAFTZ
Created December 23, 2025 21:07
Show Gist options
  • Select an option

  • Save LCRAFTZ/4ea539b6c607dee776fb8e9baf86eee3 to your computer and use it in GitHub Desktop.

Select an option

Save LCRAFTZ/4ea539b6c607dee776fb8e9baf86eee3 to your computer and use it in GitHub Desktop.
A minimal linter to catch lookahead bias & data leakage in backtests — inspired by r/algotrading.
#!/usr/bin/env python3
"""
Backtest Guard: Detect common lookahead/data leakage mistakes.
Usage: python backtest-guard.py your_strategy.py # or your_signals.csv
"""
import sys
import re
from pathlib import Path
import pandas as pd
def check_lookahead_in_code(file_path):
"""Check Python strategy file for dangerous patterns."""
code = Path(file_path).read_text(encoding="utf-8", errors="ignore")
issues = []
if re.search(r"\.rolling\([^)]*\)\.\w+\(\)", code) and not re.search(
r"\.shift\(\s*1\s*\)", code
):
issues.append("⚠️ Rolling window without .shift(1) — likely lookahead")
if re.search(r"\.shift\(\s*-\s*\d+\s*\)", code):
issues.append("⚠️ Negative shift detected — using future data")
return issues
def check_csv_timestamps(csv_path):
"""Check CSV for non-chronological timestamps."""
df = pd.read_csv(csv_path)
if "timestamp" not in df.columns:
return ["❓ No 'timestamp' column found"]
try:
df["timestamp"] = pd.to_datetime(df["timestamp"])
except Exception:
return ["⚠️ Failed to parse 'timestamp' — check format"]
if not df["timestamp"].is_monotonic_increasing:
invalid_idx = df.index[
~(
df["timestamp"].diff().fillna(pd.Timedelta(0))
>= pd.Timedelta(0)
)
].tolist()
return [
f"⚠️ Non-chronological timestamps at rows: {invalid_idx[:5]}... ({len(invalid_idx)} total)"
]
return ["✅ Timestamps are chronological"]
def main():
if len(sys.argv) < 2:
print("Usage: python backtest-guard.py <strategy.py or signals.csv>")
sys.exit(1)
target = sys.argv[1]
print(f"🔍 Analyzing: {target}\n")
if target.endswith(".py"):
print("📄 Code Analysis:")
issues = check_lookahead_in_code(target)
for issue in issues or ["✅ No obvious lookahead patterns found"]:
print(f" {issue}")
elif target.endswith(".csv"):
print("📊 CSV Analysis:")
issues = check_csv_timestamps(target)
for issue in issues:
print(f" {issue}")
else:
print("❓ Unsupported file type. Use .py or .csv")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment