Skip to content

Instantly share code, notes, and snippets.

@osirison
Created June 28, 2025 22:18
Show Gist options
  • Save osirison/4522f47d65a3df3d0ae16386219d0b49 to your computer and use it in GitHub Desktop.
Save osirison/4522f47d65a3df3d0ae16386219d0b49 to your computer and use it in GitHub Desktop.
Gist created from clipboard
# MSFT Log Return Prediction Competition - Jupyter Notebook
# Multi-Agent Framework Implementation
"""
This notebook implements a complete 4-agent system for predicting MSFT stock returns.
The agents work together to achieve RMSE < 0.013 through iterative improvement.
Agents:
1. EDA_Agent: Exploratory data analysis and validation
2. FeatureEngineering_Agent: Financial feature creation
3. Modeling_Agent: Neural network training and optimization
4. Evaluation_Agent: Model evaluation and submission generation
"""
# ===============================================================================
# CELL 1: Environment Setup and Imports
# ===============================================================================
import os
import sys
import asyncio
import json
import logging
from pathlib import Path
from datetime import datetime
# Add the framework to path (if not installed as package)
sys.path.append('/home/jovyan/submission')
# Environment configuration
from dotenv import load_dotenv
load_dotenv(override=True)
# Verify environment variables
print("πŸ”§ Checking environment configuration...")
required_env_vars = ["APIM_KEY", "AZURE_APIM_ENDPONT"]
missing_vars = [var for var in required_env_vars if not os.getenv(var)]
if missing_vars:
print(f"❌ Missing environment variables: {missing_vars}")
print("Please set these in your .env file:")
for var in missing_vars:
print(f" {var}=your_value_here")
else:
print("βœ… Environment variables configured")
print(f"Azure Endpoint: {os.getenv('AZURE_APIM_ENDPONT', 'Not set')}")
print(f"API Key configured: {'Yes' if os.getenv('APIM_KEY') else 'No'}")
# ===============================================================================
# CELL 2: Install Required Packages
# ===============================================================================
import subprocess
import sys
def install_package(package):
"""Install package if not already installed"""
try:
__import__(package.replace("-", "_").split("[")[0])
print(f"βœ… {package} already installed")
return True
except ImportError:
print(f"πŸ“¦ Installing {package}...")
try:
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
print(f"βœ… {package} installed successfully")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Failed to install {package}: {e}")
return False
# Required packages for the competition
required_packages = [
"agno",
"azure-openai",
"python-dotenv",
"pandas>=1.5.0",
"numpy>=1.21.0",
"scikit-learn>=1.1.0",
"tensorflow>=2.10.0",
"matplotlib>=3.5.0",
"seaborn>=0.11.0",
"pydantic>=1.10.0",
"scipy>=1.9.0",
"plotly>=5.0.0"
]
print("πŸ”§ Installing required packages...")
failed_packages = []
for package in required_packages:
if not install_package(package):
failed_packages.append(package)
if failed_packages:
print(f"\n❌ Failed to install: {failed_packages}")
print("Please install these packages manually before proceeding.")
else:
print("\nβœ… All packages installed successfully!")
# ===============================================================================
# CELL 3: Setup Working Directory and Validate Data
# ===============================================================================
import pandas as pd
import numpy as np
from pathlib import Path
# Setup directories
ROOT_DIR = Path("/home/jovyan/submission")
DATA_DIR = ROOT_DIR / "data"
# Create directories if they don't exist
ROOT_DIR.mkdir(exist_ok=True)
DATA_DIR.mkdir(exist_ok=True)
print(f"πŸ“ Working directory: {ROOT_DIR}")
print(f"πŸ“ Data directory: {DATA_DIR}")
# Validate data files
required_files = ["train_clean.csv", "val_clean.csv", "test_clean.csv"]
data_status = {}
print("\nπŸ“Š Validating data files...")
for file in required_files:
file_path = DATA_DIR / file
if file_path.exists():
try:
df = pd.read_csv(file_path)
data_status[file] = {
"exists": True,
"rows": len(df),
"columns": list(df.columns),
"size_mb": file_path.stat().st_size / (1024 * 1024)
}
print(f"βœ… {file}: {len(df)} rows, {len(df.columns)} columns, {data_status[file]['size_mb']:.2f} MB")
except Exception as e:
data_status[file] = {"exists": True, "error": str(e)}
print(f"❌ {file}: Error reading file - {e}")
else:
data_status[file] = {"exists": False}
print(f"❌ {file}: File not found")
# Check if all required files exist and are valid
all_files_valid = all(
status.get("exists", False) and "error" not in status
for status in data_status.values()
)
if all_files_valid:
print("\nβœ… All data files validated successfully!")
# Show data sample
sample_df = pd.read_csv(DATA_DIR / "train_clean.csv").head()
print("\nπŸ“‹ Sample data structure:")
print(sample_df)
print(f"\nColumns: {list(sample_df.columns)}")
else:
print("\n❌ Data validation failed. Please ensure all CSV files are in the data directory.")
# ===============================================================================
# CELL 4: Import and Initialize the Framework
# ===============================================================================
# Import the main framework
try:
from msft_prediction_framework import (
MSFTPredictionOrchestrator,
JupyterInterface,
IterativeImprovementManager,
PerformanceMonitor,
AzureOptimizer
)
print("βœ… Framework imported successfully")
except ImportError as e:
print(f"❌ Framework import failed: {e}")
print("Please ensure the framework file is in the correct location")
# Initialize components
print("πŸš€ Initializing framework components...")
try:
# Setup Azure optimizations
azure_optimizer = AzureOptimizer(memory_limit_gb=8)
azure_optimizer.optimize_environment()
print("βœ… Azure optimizations applied")
# Setup Jupyter interface
JupyterInterface.setup_environment()
print("βœ… Jupyter environment configured")
print("βœ… Framework initialization complete!")
except Exception as e:
print(f"❌ Framework initialization failed: {e}")
# ===============================================================================
# CELL 5: Configuration and Agent Setup
# ===============================================================================
# Competition configuration
COMPETITION_CONFIG = {
"target_rmse": 0.013,
"max_iterations": 3,
"timeout_minutes": 60,
"memory_limit_gb": 8,
"enable_iteration": True,
"save_intermediate_results": True
}
print("🎯 Competition Configuration:")
for key, value in COMPETITION_CONFIG.items():
print(f" {key}: {value}")
# Initialize the orchestrator
print("\nπŸ€– Initializing agent orchestrator...")
try:
orchestrator = MSFTPredictionOrchestrator()
print("βœ… Orchestrator initialized with 4 agents:")
print(" - EDA_Agent: Data analysis and validation")
print(" - FeatureEngineering_Agent: Financial feature creation")
print(" - Modeling_Agent: Neural network training")
print(" - Evaluation_Agent: Model evaluation and scoring")
except Exception as e:
print(f"❌ Orchestrator initialization failed: {e}")
# ===============================================================================
# CELL 6: Agent Prompt Validation
# ===============================================================================
from msft_prediction_framework import AgentPrompts
print("πŸ“ Validating agent prompts...")
# Check each agent prompt
agents = ["EDA", "Feature Engineering", "Modeling", "Evaluation"]
prompt_methods = [
AgentPrompts.get_eda_prompt,
AgentPrompts.get_feature_engineering_prompt,
AgentPrompts.get_modeling_prompt,
AgentPrompts.get_evaluation_prompt
]
for agent, method in zip(agents, prompt_methods):
try:
prompt = method()
word_count = len(prompt.split())
print(f"βœ… {agent} Agent: {word_count} words")
# Validate prompt contains no code
if "```" in prompt or "def " in prompt or "import " in prompt:
print(f"⚠️ {agent} Agent: Prompt may contain code (against competition rules)")
else:
print(f"βœ… {agent} Agent: Prompt is code-free")
except Exception as e:
print(f"❌ {agent} Agent: Prompt validation failed - {e}")
print("\nβœ… Agent prompt validation complete!")
# ===============================================================================
# CELL 7: Execute Competition Pipeline
# ===============================================================================
# Setup logging for the competition run
log_file = ROOT_DIR / "competition_execution.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
logger = logging.getLogger("MSFT_Competition")
print("🏁 Starting MSFT Log Return Prediction Competition")
print("=" * 60)
print(f"Target RMSE: {COMPETITION_CONFIG['target_rmse']}")
print(f"Max Iterations: {COMPETITION_CONFIG['max_iterations']}")
print(f"Timeout: {COMPETITION_CONFIG['timeout_minutes']} minutes")
print("=" * 60)
# Execute the competition
start_time = datetime.now()
try:
# Run the complete pipeline
results = await orchestrator.execute_pipeline(
max_iterations=COMPETITION_CONFIG['max_iterations']
)
end_time = datetime.now()
execution_time = end_time - start_time
print("\n🎯 COMPETITION RESULTS")
print("=" * 60)
print(f"Execution Time: {execution_time}")
print(f"Total Iterations: {len(results['iterations'])}")
print(f"Best RMSE: {results['best_rmse']:.6f}")
print(f"Target RMSE: {COMPETITION_CONFIG['target_rmse']}")
print(f"Target Achieved: {'βœ… YES' if results['target_achieved'] else '❌ NO'}")
print(f"Best Iteration: {results['best_iteration']}")
# Save results
results_file = ROOT_DIR / "competition_results.json"
with open(results_file, "w") as f:
json.dump({
**results,
"execution_time": str(execution_time),
"config": COMPETITION_CONFIG
}, f, indent=2)
print(f"\nπŸ“Š Results saved to: {results_file}")
except Exception as e:
print(f"\n❌ Competition execution failed: {e}")
logger.error(f"Competition failed: {e}")
raise
# ===============================================================================
# CELL 8: Validate Generated Files
# ===============================================================================
print("\nπŸ“ Validating generated files...")
# Expected files from the competition
expected_files = [
"EDA.py",
"FEATURE.py",
"MODEL.py",
"EVAL.py",
"MSFT_Score.txt",
"submission_log.json"
]
file_validation = {}
for file in expected_files:
file_path = ROOT_DIR / file
if file_path.exists():
file_size = file_path.stat().st_size
file_validation[file] = {
"exists": True,
"size_bytes": file_size,
"size_kb": file_size / 1024
}
print(f"βœ… {file}: {file_size:,} bytes ({file_size/1024:.1f} KB)")
# Special validation for specific files
if file == "MSFT_Score.txt":
try:
with open(file_path, "r") as f:
content = f.read().strip()
if content.startswith("RMSE: Value from Test Set :"):
print(f" βœ… Format validation passed")
# Extract RMSE value
rmse_value = float(content.split(":")[-1].strip())
print(f" πŸ“Š Test RMSE: {rmse_value:.6f}")
file_validation[file]["rmse"] = rmse_value
else:
print(f" ❌ Format validation failed")
except Exception as e:
print(f" ❌ Content validation failed: {e}")
elif file == "submission_log.json":
try:
with open(file_path, "r") as f:
log_data = json.load(f)
required_keys = [
"EDA_Agent", "FeatureEngineering_Agent",
"Modeling_Agent", "Evaluation_Agent",
"EDA_Script", "FeatureEngineering_Script",
"Modeling_Script", "Evaluation_Script"
]
missing_keys = [key for key in required_keys if key not in log_data]
if not missing_keys:
print(f" βœ… Schema validation passed")
else:
print(f" ❌ Missing keys: {missing_keys}")
except Exception as e:
print(f" ❌ JSON validation failed: {e}")
else:
file_validation[file] = {"exists": False}
print(f"❌ {file}: Not found")
# Check if all critical files exist
all_files_exist = all(validation["exists"] for validation in file_validation.values())
if all_files_exist:
print("\nβœ… All required files generated successfully!")
else:
missing_files = [file for file, val in file_validation.items() if not val["exists"]]
print(f"\n❌ Missing files: {missing_files}")
# ===============================================================================
# CELL 9: Performance Analysis and Iteration Review
# ===============================================================================
print("\nπŸ“ˆ Performance Analysis")
print("=" * 60)
# Analyze iteration performance
if 'results' in locals() and results:
improvement_manager = IterativeImprovementManager()
performance_analysis = improvement_manager.analyze_performance(results["iterations"])
print("Iteration Performance:")
for i, rmse in enumerate(performance_analysis["rmse_progression"]):
status = "🎯" if rmse < COMPETITION_CONFIG["target_rmse"] else "πŸ“Š"
print(f" Iteration {i+1}: {status} RMSE = {rmse:.6f}")
print(f"\nBest Performance:")
print(f" Best RMSE: {performance_analysis['best_rmse']:.6f}")
print(f" Target Achieved: {'Yes' if performance_analysis['target_achieved'] else 'No'}")
if not performance_analysis['target_achieved']:
print(f"\nImprovement Suggestions:")
for suggestion in performance_analysis['improvement_suggestions']:
print(f" β€’ {suggestion}")
# Agent-specific performance
print(f"\nAgent Execution Summary:")
for iteration in results["iterations"]:
print(f"\nIteration {iteration['iteration']}:")
for agent_name, agent_result in iteration["agents"].items():
status_icon = "βœ…" if agent_result["status"] == "success" else "❌"
print(f" {status_icon} {agent_name}: {agent_result['status']} - {agent_result['message'][:100]}...")
else:
print("❌ No results available for analysis")
# ===============================================================================
# CELL 10: File Content Inspection
# ===============================================================================
print("\nπŸ“„ Generated File Inspection")
print("=" * 60)
def inspect_python_file(filename):
"""Inspect a generated Python file"""
file_path = ROOT_DIR / filename
if not file_path.exists():
print(f"❌ {filename}: File not found")
return
try:
with open(file_path, "r") as f:
content = f.read()
lines = content.split('\n')
non_empty_lines = [line for line in lines if line.strip()]
print(f"πŸ“„ {filename}:")
print(f" Total lines: {len(lines)}")
print(f" Non-empty lines: {len(non_empty_lines)}")
print(f" File size: {len(content)} characters")
# Check for key components
imports = [line for line in lines if line.strip().startswith('import ') or line.strip().startswith('from ')]
functions = [line for line in lines if line.strip().startswith('def ')]
classes = [line for line in lines if line.strip().startswith('class ')]
print(f" Imports: {len(imports)}")
print(f" Functions: {len(functions)}")
print(f" Classes: {len(classes)}")
# Show first few lines
print(f" First 5 lines:")
for i, line in enumerate(lines[:5]):
print(f" {i+1}: {line[:80]}{'...' if len(line) > 80 else ''}")
return True
except Exception as e:
print(f"❌ {filename}: Error reading file - {e}")
return False
# Inspect all generated Python files
python_files = ["EDA.py", "FEATURE.py", "MODEL.py", "EVAL.py"]
for file in python_files:
inspect_python_file(file)
print()
# ===============================================================================
# CELL 11: Competition Submission Validation
# ===============================================================================
print("\nπŸ† Competition Submission Validation")
print("=" * 60)
def validate_submission():
"""Validate the competition submission"""
validation_results = {
"files_present": True,
"format_compliance": True,
"content_validation": True,
"errors": []
}
# Check required files
required_files = ["EDA.py", "FEATURE.py", "MODEL.py", "EVAL.py", "MSFT_Score.txt", "submission_log.json"]
for file in required_files:
if not (ROOT_DIR / file).exists():
validation_results["files_present"] = False
validation_results["errors"].append(f"Missing file: {file}")
# Validate MSFT_Score.txt format
score_file = ROOT_DIR / "MSFT_Score.txt"
if score_file.exists():
try:
with open(score_file, "r") as f:
content = f.read().strip()
if not content.startswith("RMSE: Value from Test Set :"):
validation_results["format_compliance"] = False
validation_results["errors"].append("MSFT_Score.txt format incorrect")
else:
try:
rmse_value = float(content.split(":")[-1].strip())
validation_results["test_rmse"] = rmse_value
validation_results["target_achieved"] = rmse_value < 0.013
except ValueError:
validation_results["format_compliance"] = False
validation_results["errors"].append("MSFT_Score.txt RMSE value not parseable")
except Exception as e:
validation_results["content_validation"] = False
validation_results["errors"].append(f"Error reading MSFT_Score.txt: {e}")
# Validate submission_log.json
log_file = ROOT_DIR / "submission_log.json"
if log_file.exists():
try:
with open(log_file, "r") as f:
log_data = json.load(f)
required_keys = [
"EDA_Agent", "FeatureEngineering_Agent", "Modeling_Agent", "Evaluation_Agent",
"EDA_Script", "FeatureEngineering_Script", "Modeling_Script", "Evaluation_Script"
]
for key in required_keys:
if key not in log_data:
validation_results["content_validation"] = False
validation_results["errors"].append(f"Missing key in submission_log.json: {key}")
elif not isinstance(log_data[key], (str, dict)):
validation_results["content_validation"] = False
validation_results["errors"].append(f"Invalid type for {key} in submission_log.json")
# Check that agent entries have required fields
for agent_key in ["EDA_Agent", "FeatureEngineering_Agent", "Modeling_Agent", "Evaluation_Agent"]:
if agent_key in log_data and isinstance(log_data[agent_key], dict):
if "prompt" not in log_data[agent_key] or "output_log" not in log_data[agent_key]:
validation_results["content_validation"] = False
validation_results["errors"].append(f"Missing prompt or output_log in {agent_key}")
except json.JSONDecodeError as e:
validation_results["content_validation"] = False
validation_results["errors"].append(f"Invalid JSON in submission_log.json: {e}")
except Exception as e:
validation_results["content_validation"] = False
validation_results["errors"].append(f"Error reading submission_log.json: {e}")
return validation_results
# Perform validation
validation = validate_submission()
print("Submission Validation Results:")
print(f"βœ… Files Present: {'Yes' if validation['files_present'] else 'No'}")
print(f"βœ… Format Compliance: {'Yes' if validation['format_compliance'] else 'No'}")
print(f"βœ… Content Validation: {'Yes' if validation['content_validation'] else 'No'}")
if validation.get("test_rmse"):
print(f"πŸ“Š Test RMSE: {validation['test_rmse']:.6f}")
print(f"🎯 Target Achieved: {'Yes' if validation.get('target_achieved') else 'No'}")
if validation["errors"]:
print(f"\n❌ Validation Errors:")
for error in validation["errors"]:
print(f" β€’ {error}")
else:
print(f"\nβœ… All validation checks passed!")
# Overall submission status
submission_valid = (
validation["files_present"] and
validation["format_compliance"] and
validation["content_validation"]
)
print(f"\nπŸ† Submission Status: {'VALID' if submission_valid else 'INVALID'}")
# ===============================================================================
# CELL 12: Final Report Generation
# ===============================================================================
print("\nπŸ“‹ Generating Final Competition Report")
print("=" * 60)
def generate_final_report():
"""Generate comprehensive final report"""
report = {
"competition_info": {
"competition_name": "MSFT Log Return Prediction",
"target_metric": "RMSE < 0.013",
"execution_date": datetime.now().isoformat(),
"framework": "Agno Multi-Agent System"
},
"execution_summary": {},
"performance_results": {},
"file_generation": {},
"validation_results": validation,
"recommendations": []
}
# Execution summary
if 'results' in locals() and results:
report["execution_summary"] = {
"total_iterations": len(results["iterations"]),
"best_iteration": results["best_iteration"],
"target_achieved": results["target_achieved"],
"best_rmse": results["best_rmse"],
"execution_time": str(execution_time) if 'execution_time' in locals() else "N/A"
}
# Performance per iteration
report["performance_results"]["iterations"] = []
for iteration in results["iterations"]:
iter_summary = {
"iteration": iteration["iteration"],
"status": iteration["status"],
"test_rmse": iteration.get("test_rmse"),
"agents_executed": len(iteration.get("agents", {}))
}
report["performance_results"]["iterations"].append(iter_summary)
# File generation status
for file in expected_files:
file_path = ROOT_DIR / file
report["file_generation"][file] = {
"generated": file_path.exists(),
"size_bytes": file_path.stat().st_size if file_path.exists() else 0
}
# Recommendations
if not validation.get("target_achieved", False):
report["recommendations"].extend([
"Consider increasing model complexity or ensemble methods",
"Experiment with advanced feature engineering techniques",
"Implement hyperparameter optimization",
"Try different neural network architectures"
])
if validation["errors"]:
report["recommendations"].append("Fix validation errors before submission")
# Save report
report_file = ROOT_DIR / "final_competition_report.json"
with open(report_file, "w") as f:
json.dump(report, f, indent=2)
return report, report_file
# Generate the report
final_report, report_path = generate_final_report()
print(f"πŸ“Š Final report generated: {report_path}")
# Display key metrics
print(f"\n🎯 Key Results:")
if 'results' in locals() and results:
print(f" Best RMSE: {results['best_rmse']:.6f}")
print(f" Target (0.013): {'βœ… Achieved' if results['target_achieved'] else '❌ Not achieved'}")
print(f" Total Iterations: {len(results['iterations'])}")
else:
print(f" ❌ No results available")
print(f"\nπŸ“ Generated Files:")
for file in expected_files:
status = "βœ…" if (ROOT_DIR / file).exists() else "❌"
print(f" {status} {file}")
print(f"\nπŸ† Submission Ready: {'Yes' if submission_valid else 'No'}")
# ===============================================================================
# CELL 13: Competition Summary and Next Steps
# ===============================================================================
print("\n" + "="*60)
print("🏁 MSFT LOG RETURN PREDICTION COMPETITION COMPLETE")
print("="*60)
# Final summary
if 'results' in locals() and results and submission_valid:
if results["target_achieved"]:
print("πŸŽ‰ CONGRATULATIONS! Competition objectives achieved:")
print(f" βœ… Target RMSE < 0.013 achieved: {results['best_rmse']:.6f}")
print(f" βœ… All required files generated")
print(f" βœ… Submission validation passed")
print(f" πŸ† Ready for competition submission!")
else:
print("πŸ“ˆ Competition completed with partial success:")
print(f" πŸ“Š Best RMSE achieved: {results['best_rmse']:.6f}")
print(f" 🎯 Target RMSE: 0.013")
print(f" βœ… All required files generated")
print(f" {'βœ…' if submission_valid else '❌'} Submission validation: {'Passed' if submission_valid else 'Failed'}")
else:
print("❌ Competition execution encountered issues")
print(" Please review the error logs and try again")
print(f"\nπŸ“‚ Submission Files Location: {ROOT_DIR}")
print(f"πŸ“‹ Detailed logs: {ROOT_DIR}/competition_execution.log")
print(f"πŸ“Š Final report: {ROOT_DIR}/final_competition_report.json")
# Next steps
print(f"\nπŸš€ Next Steps:")
if 'results' in locals() and results and results["target_achieved"] and submission_valid:
print("1. βœ… Your submission is ready!")
print("2. πŸ“ Submit all generated files from the submission directory")
print("3. πŸ† Upload submission_log.json as your primary submission")
else:
print("1. πŸ“Š Review the performance analysis and error logs")
print("2. πŸ”§ Consider running additional iterations with improvements")
print("3. 🎯 Focus on achieving RMSE < 0.013")
print("4. βœ… Ensure all validation checks pass before submission")
print(f"\n⏰ Competition completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)
# ===============================================================================
# CELL 14: Optional - Manual Testing and Debugging
# ===============================================================================
print("\nπŸ”§ Manual Testing and Debugging Tools")
print("="*60)
def test_individual_script(script_name):
"""Test an individual generated script"""
script_path = ROOT_DIR / script_name
if not script_path.exists():
print(f"❌ {script_name} not found")
return False
print(f"πŸ§ͺ Testing {script_name}...")
try:
# Read and validate syntax
with open(script_path, "r") as f:
content = f.read()
# Basic syntax check
compile(content, script_name, 'exec')
print(f" βœ… Syntax validation passed")
# Try to execute (commented out for safety in notebook)
# exec(content)
print(f" ⚠️ Execution test skipped (uncomment to run)")
return True
except SyntaxError as e:
print(f" ❌ Syntax error: {e}")
return False
except Exception as e:
print(f" ❌ Execution error: {e}")
return False
def show_file_contents(filename, max_lines=50):
"""Display contents of a file"""
file_path = ROOT_DIR / filename
if not file_path.exists():
print(f"❌ {filename} not found")
return
print(f"πŸ“„ Contents of {filename} (first {max_lines} lines):")
print("-" * 60)
try:
with open(file_path, "r") as f:
lines = f.readlines()
for i, line in enumerate(lines[:max_lines], 1):
print(f"{i:3d}: {line.rstrip()}")
if len(lines) > max_lines:
print(f"... ({len(lines) - max_lines} more lines)")
except Exception as e:
print(f"❌ Error reading file: {e}")
# Testing interface
print("Available testing functions:")
print("β€’ test_individual_script('EDA.py') - Test EDA script")
print("β€’ test_individual_script('FEATURE.py') - Test Feature script")
print("β€’ test_individual_script('MODEL.py') - Test Model script")
print("β€’ test_individual_script('EVAL.py') - Test Evaluation script")
print("β€’ show_file_contents('filename.txt') - Display file contents")
# Example usage (uncomment to run):
# test_individual_script('EDA.py')
# show_file_contents('MSFT_Score.txt')
# ===============================================================================
# CELL 15: Advanced Analysis and Improvement Suggestions
# ===============================================================================
print("\nπŸ“ˆ Advanced Performance Analysis")
print("="*60)
def analyze_rmse_progression():
"""Analyze RMSE progression across iterations"""
if 'results' not in locals() or not results:
print("❌ No results available for analysis")
return
iterations = results["iterations"]
rmse_values = []
print("RMSE Progression Analysis:")
print("-" * 40)
for i, iteration in enumerate(iterations):
rmse = iteration.get("test_rmse", float('inf'))
rmse_values.append(rmse)
improvement = ""
if i > 0 and rmse < rmse_values[i-1]:
improvement = f" (↓ {rmse_values[i-1] - rmse:.6f})"
elif i > 0 and rmse > rmse_values[i-1]:
improvement = f" (↑ {rmse - rmse_values[i-1]:.6f})"
status = "🎯" if rmse < 0.013 else "πŸ“Š"
print(f"Iteration {i+1}: {status} {rmse:.6f}{improvement}")
# Calculate statistics
if rmse_values:
best_rmse = min(rmse_values)
worst_rmse = max(rmse_values)
avg_rmse = sum(rmse_values) / len(rmse_values)
print(f"\nStatistics:")
print(f" Best RMSE: {best_rmse:.6f}")
print(f" Worst RMSE: {worst_rmse:.6f}")
print(f" Average RMSE: {avg_rmse:.6f}")
print(f" Improvement: {worst_rmse - best_rmse:.6f}")
print(f" Target Gap: {best_rmse - 0.013:.6f}")
def generate_improvement_strategy():
"""Generate specific improvement strategies"""
if 'results' not in locals() or not results:
print("❌ No results available for strategy generation")
return
best_rmse = results["best_rmse"]
target_rmse = 0.013
print("🎯 Improvement Strategy Recommendations:")
print("-" * 40)
if best_rmse > 0.02:
print("πŸ“Š Current performance: POOR (RMSE > 0.02)")
print("Recommended actions:")
print(" 1. πŸ”§ Completely redesign feature engineering")
print(" 2. 🧠 Try ensemble methods (Random Forest + LSTM)")
print(" 3. πŸ“Š Add more technical indicators and market features")
print(" 4. ⏱️ Increase sequence length for LSTM")
print(" 5. πŸŽ›οΈ Implement hyperparameter optimization")
elif best_rmse > 0.015:
print("πŸ“Š Current performance: MODERATE (RMSE 0.015-0.02)")
print("Recommended actions:")
print(" 1. πŸŽ›οΈ Fine-tune model hyperparameters")
print(" 2. πŸ”§ Add regularization (dropout, L1/L2)")
print(" 3. πŸ“ˆ Implement feature selection/importance")
print(" 4. 🧠 Try attention mechanisms in LSTM")
print(" 5. πŸ“Š Add more sophisticated features")
elif best_rmse > 0.013:
print("πŸ“Š Current performance: GOOD (RMSE 0.013-0.015)")
print("Recommended actions:")
print(" 1. 🎯 Fine-tune learning rate and batch size")
print(" 2. πŸ”§ Implement early stopping optimization")
print(" 3. πŸ“Š Feature engineering optimization")
print(" 4. 🧠 Model architecture tweaks")
print(" 5. πŸ“ˆ Cross-validation improvements")
else:
print("🎯 Current performance: EXCELLENT (RMSE < 0.013)")
print("πŸ† Target achieved! Consider:")
print(" 1. βœ… Submit current solution")
print(" 2. πŸ”§ Further optimize for robustness")
print(" 3. πŸ“Š Document successful strategy")
# Run advanced analysis
analyze_rmse_progression()
print()
generate_improvement_strategy()
# ===============================================================================
# FINAL CELL: Cleanup and Summary
# ===============================================================================
print("\n" + "="*80)
print("πŸŽ‰ MSFT PREDICTION COMPETITION FRAMEWORK EXECUTION COMPLETE")
print("="*80)
# Final status summary
final_status = {
"framework_executed": True,
"files_generated": all((ROOT_DIR / f).exists() for f in expected_files),
"target_achieved": 'results' in locals() and results and results.get('target_achieved', False),
"submission_valid": submission_valid if 'submission_valid' in locals() else False
}
print("πŸ“Š FINAL STATUS SUMMARY:")
print(f" Framework Execution: {'βœ… Complete' if final_status['framework_executed'] else '❌ Failed'}")
print(f" File Generation: {'βœ… Complete' if final_status['files_generated'] else '❌ Incomplete'}")
print(f" Target Achievement: {'βœ… RMSE < 0.013' if final_status['target_achieved'] else '❌ Target not met'}")
print(f" Submission Validation: {'βœ… Valid' if final_status['submission_valid'] else '❌ Invalid'}")
# Competition score
score = sum(final_status.values())
print(f"\nπŸ† OVERALL SCORE: {score}/4")
if score == 4:
print("πŸŽ‰ PERFECT SCORE! Ready for competition submission!")
elif score >= 3:
print("πŸ‘ GOOD SCORE! Minor improvements needed.")
elif score >= 2:
print("πŸ“ˆ MODERATE SCORE! Some work required.")
else:
print("πŸ”§ NEEDS WORK! Review errors and retry.")
print(f"\nπŸ“ All files available at: {ROOT_DIR}")
print(f"πŸ“§ Support: Review logs in {ROOT_DIR}/competition_execution.log")
print(f"πŸš€ Framework: Agno Multi-Agent System v1.0")
print(f"⏰ Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("\n" + "="*80)
print("Thank you for using the MSFT Prediction Competition Framework!")
print("="*80)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment