Created
June 28, 2025 22:18
-
-
Save osirison/4522f47d65a3df3d0ae16386219d0b49 to your computer and use it in GitHub Desktop.
Gist created from clipboard
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MSFT Log Return Prediction Competition - Jupyter Notebook | |
# Multi-Agent Framework Implementation | |
""" | |
This notebook implements a complete 4-agent system for predicting MSFT stock returns. | |
The agents work together to achieve RMSE < 0.013 through iterative improvement. | |
Agents: | |
1. EDA_Agent: Exploratory data analysis and validation | |
2. FeatureEngineering_Agent: Financial feature creation | |
3. Modeling_Agent: Neural network training and optimization | |
4. Evaluation_Agent: Model evaluation and submission generation | |
""" | |
# =============================================================================== | |
# CELL 1: Environment Setup and Imports | |
# =============================================================================== | |
import os | |
import sys | |
import asyncio | |
import json | |
import logging | |
from pathlib import Path | |
from datetime import datetime | |
# Add the framework to path (if not installed as package) | |
sys.path.append('/home/jovyan/submission') | |
# Environment configuration | |
from dotenv import load_dotenv | |
load_dotenv(override=True) | |
# Verify environment variables | |
print("π§ Checking environment configuration...") | |
required_env_vars = ["APIM_KEY", "AZURE_APIM_ENDPONT"] | |
missing_vars = [var for var in required_env_vars if not os.getenv(var)] | |
if missing_vars: | |
print(f"β Missing environment variables: {missing_vars}") | |
print("Please set these in your .env file:") | |
for var in missing_vars: | |
print(f" {var}=your_value_here") | |
else: | |
print("β Environment variables configured") | |
print(f"Azure Endpoint: {os.getenv('AZURE_APIM_ENDPONT', 'Not set')}") | |
print(f"API Key configured: {'Yes' if os.getenv('APIM_KEY') else 'No'}") | |
# =============================================================================== | |
# CELL 2: Install Required Packages | |
# =============================================================================== | |
import subprocess | |
import sys | |
def install_package(package): | |
"""Install package if not already installed""" | |
try: | |
__import__(package.replace("-", "_").split("[")[0]) | |
print(f"β {package} already installed") | |
return True | |
except ImportError: | |
print(f"π¦ Installing {package}...") | |
try: | |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) | |
print(f"β {package} installed successfully") | |
return True | |
except subprocess.CalledProcessError as e: | |
print(f"β Failed to install {package}: {e}") | |
return False | |
# Required packages for the competition | |
required_packages = [ | |
"agno", | |
"azure-openai", | |
"python-dotenv", | |
"pandas>=1.5.0", | |
"numpy>=1.21.0", | |
"scikit-learn>=1.1.0", | |
"tensorflow>=2.10.0", | |
"matplotlib>=3.5.0", | |
"seaborn>=0.11.0", | |
"pydantic>=1.10.0", | |
"scipy>=1.9.0", | |
"plotly>=5.0.0" | |
] | |
print("π§ Installing required packages...") | |
failed_packages = [] | |
for package in required_packages: | |
if not install_package(package): | |
failed_packages.append(package) | |
if failed_packages: | |
print(f"\nβ Failed to install: {failed_packages}") | |
print("Please install these packages manually before proceeding.") | |
else: | |
print("\nβ All packages installed successfully!") | |
# =============================================================================== | |
# CELL 3: Setup Working Directory and Validate Data | |
# =============================================================================== | |
import pandas as pd | |
import numpy as np | |
from pathlib import Path | |
# Setup directories | |
ROOT_DIR = Path("/home/jovyan/submission") | |
DATA_DIR = ROOT_DIR / "data" | |
# Create directories if they don't exist | |
ROOT_DIR.mkdir(exist_ok=True) | |
DATA_DIR.mkdir(exist_ok=True) | |
print(f"π Working directory: {ROOT_DIR}") | |
print(f"π Data directory: {DATA_DIR}") | |
# Validate data files | |
required_files = ["train_clean.csv", "val_clean.csv", "test_clean.csv"] | |
data_status = {} | |
print("\nπ Validating data files...") | |
for file in required_files: | |
file_path = DATA_DIR / file | |
if file_path.exists(): | |
try: | |
df = pd.read_csv(file_path) | |
data_status[file] = { | |
"exists": True, | |
"rows": len(df), | |
"columns": list(df.columns), | |
"size_mb": file_path.stat().st_size / (1024 * 1024) | |
} | |
print(f"β {file}: {len(df)} rows, {len(df.columns)} columns, {data_status[file]['size_mb']:.2f} MB") | |
except Exception as e: | |
data_status[file] = {"exists": True, "error": str(e)} | |
print(f"β {file}: Error reading file - {e}") | |
else: | |
data_status[file] = {"exists": False} | |
print(f"β {file}: File not found") | |
# Check if all required files exist and are valid | |
all_files_valid = all( | |
status.get("exists", False) and "error" not in status | |
for status in data_status.values() | |
) | |
if all_files_valid: | |
print("\nβ All data files validated successfully!") | |
# Show data sample | |
sample_df = pd.read_csv(DATA_DIR / "train_clean.csv").head() | |
print("\nπ Sample data structure:") | |
print(sample_df) | |
print(f"\nColumns: {list(sample_df.columns)}") | |
else: | |
print("\nβ Data validation failed. Please ensure all CSV files are in the data directory.") | |
# =============================================================================== | |
# CELL 4: Import and Initialize the Framework | |
# =============================================================================== | |
# Import the main framework | |
try: | |
from msft_prediction_framework import ( | |
MSFTPredictionOrchestrator, | |
JupyterInterface, | |
IterativeImprovementManager, | |
PerformanceMonitor, | |
AzureOptimizer | |
) | |
print("β Framework imported successfully") | |
except ImportError as e: | |
print(f"β Framework import failed: {e}") | |
print("Please ensure the framework file is in the correct location") | |
# Initialize components | |
print("π Initializing framework components...") | |
try: | |
# Setup Azure optimizations | |
azure_optimizer = AzureOptimizer(memory_limit_gb=8) | |
azure_optimizer.optimize_environment() | |
print("β Azure optimizations applied") | |
# Setup Jupyter interface | |
JupyterInterface.setup_environment() | |
print("β Jupyter environment configured") | |
print("β Framework initialization complete!") | |
except Exception as e: | |
print(f"β Framework initialization failed: {e}") | |
# =============================================================================== | |
# CELL 5: Configuration and Agent Setup | |
# =============================================================================== | |
# Competition configuration | |
COMPETITION_CONFIG = { | |
"target_rmse": 0.013, | |
"max_iterations": 3, | |
"timeout_minutes": 60, | |
"memory_limit_gb": 8, | |
"enable_iteration": True, | |
"save_intermediate_results": True | |
} | |
print("π― Competition Configuration:") | |
for key, value in COMPETITION_CONFIG.items(): | |
print(f" {key}: {value}") | |
# Initialize the orchestrator | |
print("\nπ€ Initializing agent orchestrator...") | |
try: | |
orchestrator = MSFTPredictionOrchestrator() | |
print("β Orchestrator initialized with 4 agents:") | |
print(" - EDA_Agent: Data analysis and validation") | |
print(" - FeatureEngineering_Agent: Financial feature creation") | |
print(" - Modeling_Agent: Neural network training") | |
print(" - Evaluation_Agent: Model evaluation and scoring") | |
except Exception as e: | |
print(f"β Orchestrator initialization failed: {e}") | |
# =============================================================================== | |
# CELL 6: Agent Prompt Validation | |
# =============================================================================== | |
from msft_prediction_framework import AgentPrompts | |
print("π Validating agent prompts...") | |
# Check each agent prompt | |
agents = ["EDA", "Feature Engineering", "Modeling", "Evaluation"] | |
prompt_methods = [ | |
AgentPrompts.get_eda_prompt, | |
AgentPrompts.get_feature_engineering_prompt, | |
AgentPrompts.get_modeling_prompt, | |
AgentPrompts.get_evaluation_prompt | |
] | |
for agent, method in zip(agents, prompt_methods): | |
try: | |
prompt = method() | |
word_count = len(prompt.split()) | |
print(f"β {agent} Agent: {word_count} words") | |
# Validate prompt contains no code | |
if "```" in prompt or "def " in prompt or "import " in prompt: | |
print(f"β οΈ {agent} Agent: Prompt may contain code (against competition rules)") | |
else: | |
print(f"β {agent} Agent: Prompt is code-free") | |
except Exception as e: | |
print(f"β {agent} Agent: Prompt validation failed - {e}") | |
print("\nβ Agent prompt validation complete!") | |
# =============================================================================== | |
# CELL 7: Execute Competition Pipeline | |
# =============================================================================== | |
# Setup logging for the competition run | |
log_file = ROOT_DIR / "competition_execution.log" | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler(log_file), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger("MSFT_Competition") | |
print("π Starting MSFT Log Return Prediction Competition") | |
print("=" * 60) | |
print(f"Target RMSE: {COMPETITION_CONFIG['target_rmse']}") | |
print(f"Max Iterations: {COMPETITION_CONFIG['max_iterations']}") | |
print(f"Timeout: {COMPETITION_CONFIG['timeout_minutes']} minutes") | |
print("=" * 60) | |
# Execute the competition | |
start_time = datetime.now() | |
try: | |
# Run the complete pipeline | |
results = await orchestrator.execute_pipeline( | |
max_iterations=COMPETITION_CONFIG['max_iterations'] | |
) | |
end_time = datetime.now() | |
execution_time = end_time - start_time | |
print("\nπ― COMPETITION RESULTS") | |
print("=" * 60) | |
print(f"Execution Time: {execution_time}") | |
print(f"Total Iterations: {len(results['iterations'])}") | |
print(f"Best RMSE: {results['best_rmse']:.6f}") | |
print(f"Target RMSE: {COMPETITION_CONFIG['target_rmse']}") | |
print(f"Target Achieved: {'β YES' if results['target_achieved'] else 'β NO'}") | |
print(f"Best Iteration: {results['best_iteration']}") | |
# Save results | |
results_file = ROOT_DIR / "competition_results.json" | |
with open(results_file, "w") as f: | |
json.dump({ | |
**results, | |
"execution_time": str(execution_time), | |
"config": COMPETITION_CONFIG | |
}, f, indent=2) | |
print(f"\nπ Results saved to: {results_file}") | |
except Exception as e: | |
print(f"\nβ Competition execution failed: {e}") | |
logger.error(f"Competition failed: {e}") | |
raise | |
# =============================================================================== | |
# CELL 8: Validate Generated Files | |
# =============================================================================== | |
print("\nπ Validating generated files...") | |
# Expected files from the competition | |
expected_files = [ | |
"EDA.py", | |
"FEATURE.py", | |
"MODEL.py", | |
"EVAL.py", | |
"MSFT_Score.txt", | |
"submission_log.json" | |
] | |
file_validation = {} | |
for file in expected_files: | |
file_path = ROOT_DIR / file | |
if file_path.exists(): | |
file_size = file_path.stat().st_size | |
file_validation[file] = { | |
"exists": True, | |
"size_bytes": file_size, | |
"size_kb": file_size / 1024 | |
} | |
print(f"β {file}: {file_size:,} bytes ({file_size/1024:.1f} KB)") | |
# Special validation for specific files | |
if file == "MSFT_Score.txt": | |
try: | |
with open(file_path, "r") as f: | |
content = f.read().strip() | |
if content.startswith("RMSE: Value from Test Set :"): | |
print(f" β Format validation passed") | |
# Extract RMSE value | |
rmse_value = float(content.split(":")[-1].strip()) | |
print(f" π Test RMSE: {rmse_value:.6f}") | |
file_validation[file]["rmse"] = rmse_value | |
else: | |
print(f" β Format validation failed") | |
except Exception as e: | |
print(f" β Content validation failed: {e}") | |
elif file == "submission_log.json": | |
try: | |
with open(file_path, "r") as f: | |
log_data = json.load(f) | |
required_keys = [ | |
"EDA_Agent", "FeatureEngineering_Agent", | |
"Modeling_Agent", "Evaluation_Agent", | |
"EDA_Script", "FeatureEngineering_Script", | |
"Modeling_Script", "Evaluation_Script" | |
] | |
missing_keys = [key for key in required_keys if key not in log_data] | |
if not missing_keys: | |
print(f" β Schema validation passed") | |
else: | |
print(f" β Missing keys: {missing_keys}") | |
except Exception as e: | |
print(f" β JSON validation failed: {e}") | |
else: | |
file_validation[file] = {"exists": False} | |
print(f"β {file}: Not found") | |
# Check if all critical files exist | |
all_files_exist = all(validation["exists"] for validation in file_validation.values()) | |
if all_files_exist: | |
print("\nβ All required files generated successfully!") | |
else: | |
missing_files = [file for file, val in file_validation.items() if not val["exists"]] | |
print(f"\nβ Missing files: {missing_files}") | |
# =============================================================================== | |
# CELL 9: Performance Analysis and Iteration Review | |
# =============================================================================== | |
print("\nπ Performance Analysis") | |
print("=" * 60) | |
# Analyze iteration performance | |
if 'results' in locals() and results: | |
improvement_manager = IterativeImprovementManager() | |
performance_analysis = improvement_manager.analyze_performance(results["iterations"]) | |
print("Iteration Performance:") | |
for i, rmse in enumerate(performance_analysis["rmse_progression"]): | |
status = "π―" if rmse < COMPETITION_CONFIG["target_rmse"] else "π" | |
print(f" Iteration {i+1}: {status} RMSE = {rmse:.6f}") | |
print(f"\nBest Performance:") | |
print(f" Best RMSE: {performance_analysis['best_rmse']:.6f}") | |
print(f" Target Achieved: {'Yes' if performance_analysis['target_achieved'] else 'No'}") | |
if not performance_analysis['target_achieved']: | |
print(f"\nImprovement Suggestions:") | |
for suggestion in performance_analysis['improvement_suggestions']: | |
print(f" β’ {suggestion}") | |
# Agent-specific performance | |
print(f"\nAgent Execution Summary:") | |
for iteration in results["iterations"]: | |
print(f"\nIteration {iteration['iteration']}:") | |
for agent_name, agent_result in iteration["agents"].items(): | |
status_icon = "β " if agent_result["status"] == "success" else "β" | |
print(f" {status_icon} {agent_name}: {agent_result['status']} - {agent_result['message'][:100]}...") | |
else: | |
print("β No results available for analysis") | |
# =============================================================================== | |
# CELL 10: File Content Inspection | |
# =============================================================================== | |
print("\nπ Generated File Inspection") | |
print("=" * 60) | |
def inspect_python_file(filename): | |
"""Inspect a generated Python file""" | |
file_path = ROOT_DIR / filename | |
if not file_path.exists(): | |
print(f"β {filename}: File not found") | |
return | |
try: | |
with open(file_path, "r") as f: | |
content = f.read() | |
lines = content.split('\n') | |
non_empty_lines = [line for line in lines if line.strip()] | |
print(f"π {filename}:") | |
print(f" Total lines: {len(lines)}") | |
print(f" Non-empty lines: {len(non_empty_lines)}") | |
print(f" File size: {len(content)} characters") | |
# Check for key components | |
imports = [line for line in lines if line.strip().startswith('import ') or line.strip().startswith('from ')] | |
functions = [line for line in lines if line.strip().startswith('def ')] | |
classes = [line for line in lines if line.strip().startswith('class ')] | |
print(f" Imports: {len(imports)}") | |
print(f" Functions: {len(functions)}") | |
print(f" Classes: {len(classes)}") | |
# Show first few lines | |
print(f" First 5 lines:") | |
for i, line in enumerate(lines[:5]): | |
print(f" {i+1}: {line[:80]}{'...' if len(line) > 80 else ''}") | |
return True | |
except Exception as e: | |
print(f"β {filename}: Error reading file - {e}") | |
return False | |
# Inspect all generated Python files | |
python_files = ["EDA.py", "FEATURE.py", "MODEL.py", "EVAL.py"] | |
for file in python_files: | |
inspect_python_file(file) | |
print() | |
# =============================================================================== | |
# CELL 11: Competition Submission Validation | |
# =============================================================================== | |
print("\nπ Competition Submission Validation") | |
print("=" * 60) | |
def validate_submission(): | |
"""Validate the competition submission""" | |
validation_results = { | |
"files_present": True, | |
"format_compliance": True, | |
"content_validation": True, | |
"errors": [] | |
} | |
# Check required files | |
required_files = ["EDA.py", "FEATURE.py", "MODEL.py", "EVAL.py", "MSFT_Score.txt", "submission_log.json"] | |
for file in required_files: | |
if not (ROOT_DIR / file).exists(): | |
validation_results["files_present"] = False | |
validation_results["errors"].append(f"Missing file: {file}") | |
# Validate MSFT_Score.txt format | |
score_file = ROOT_DIR / "MSFT_Score.txt" | |
if score_file.exists(): | |
try: | |
with open(score_file, "r") as f: | |
content = f.read().strip() | |
if not content.startswith("RMSE: Value from Test Set :"): | |
validation_results["format_compliance"] = False | |
validation_results["errors"].append("MSFT_Score.txt format incorrect") | |
else: | |
try: | |
rmse_value = float(content.split(":")[-1].strip()) | |
validation_results["test_rmse"] = rmse_value | |
validation_results["target_achieved"] = rmse_value < 0.013 | |
except ValueError: | |
validation_results["format_compliance"] = False | |
validation_results["errors"].append("MSFT_Score.txt RMSE value not parseable") | |
except Exception as e: | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Error reading MSFT_Score.txt: {e}") | |
# Validate submission_log.json | |
log_file = ROOT_DIR / "submission_log.json" | |
if log_file.exists(): | |
try: | |
with open(log_file, "r") as f: | |
log_data = json.load(f) | |
required_keys = [ | |
"EDA_Agent", "FeatureEngineering_Agent", "Modeling_Agent", "Evaluation_Agent", | |
"EDA_Script", "FeatureEngineering_Script", "Modeling_Script", "Evaluation_Script" | |
] | |
for key in required_keys: | |
if key not in log_data: | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Missing key in submission_log.json: {key}") | |
elif not isinstance(log_data[key], (str, dict)): | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Invalid type for {key} in submission_log.json") | |
# Check that agent entries have required fields | |
for agent_key in ["EDA_Agent", "FeatureEngineering_Agent", "Modeling_Agent", "Evaluation_Agent"]: | |
if agent_key in log_data and isinstance(log_data[agent_key], dict): | |
if "prompt" not in log_data[agent_key] or "output_log" not in log_data[agent_key]: | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Missing prompt or output_log in {agent_key}") | |
except json.JSONDecodeError as e: | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Invalid JSON in submission_log.json: {e}") | |
except Exception as e: | |
validation_results["content_validation"] = False | |
validation_results["errors"].append(f"Error reading submission_log.json: {e}") | |
return validation_results | |
# Perform validation | |
validation = validate_submission() | |
print("Submission Validation Results:") | |
print(f"β Files Present: {'Yes' if validation['files_present'] else 'No'}") | |
print(f"β Format Compliance: {'Yes' if validation['format_compliance'] else 'No'}") | |
print(f"β Content Validation: {'Yes' if validation['content_validation'] else 'No'}") | |
if validation.get("test_rmse"): | |
print(f"π Test RMSE: {validation['test_rmse']:.6f}") | |
print(f"π― Target Achieved: {'Yes' if validation.get('target_achieved') else 'No'}") | |
if validation["errors"]: | |
print(f"\nβ Validation Errors:") | |
for error in validation["errors"]: | |
print(f" β’ {error}") | |
else: | |
print(f"\nβ All validation checks passed!") | |
# Overall submission status | |
submission_valid = ( | |
validation["files_present"] and | |
validation["format_compliance"] and | |
validation["content_validation"] | |
) | |
print(f"\nπ Submission Status: {'VALID' if submission_valid else 'INVALID'}") | |
# =============================================================================== | |
# CELL 12: Final Report Generation | |
# =============================================================================== | |
print("\nπ Generating Final Competition Report") | |
print("=" * 60) | |
def generate_final_report(): | |
"""Generate comprehensive final report""" | |
report = { | |
"competition_info": { | |
"competition_name": "MSFT Log Return Prediction", | |
"target_metric": "RMSE < 0.013", | |
"execution_date": datetime.now().isoformat(), | |
"framework": "Agno Multi-Agent System" | |
}, | |
"execution_summary": {}, | |
"performance_results": {}, | |
"file_generation": {}, | |
"validation_results": validation, | |
"recommendations": [] | |
} | |
# Execution summary | |
if 'results' in locals() and results: | |
report["execution_summary"] = { | |
"total_iterations": len(results["iterations"]), | |
"best_iteration": results["best_iteration"], | |
"target_achieved": results["target_achieved"], | |
"best_rmse": results["best_rmse"], | |
"execution_time": str(execution_time) if 'execution_time' in locals() else "N/A" | |
} | |
# Performance per iteration | |
report["performance_results"]["iterations"] = [] | |
for iteration in results["iterations"]: | |
iter_summary = { | |
"iteration": iteration["iteration"], | |
"status": iteration["status"], | |
"test_rmse": iteration.get("test_rmse"), | |
"agents_executed": len(iteration.get("agents", {})) | |
} | |
report["performance_results"]["iterations"].append(iter_summary) | |
# File generation status | |
for file in expected_files: | |
file_path = ROOT_DIR / file | |
report["file_generation"][file] = { | |
"generated": file_path.exists(), | |
"size_bytes": file_path.stat().st_size if file_path.exists() else 0 | |
} | |
# Recommendations | |
if not validation.get("target_achieved", False): | |
report["recommendations"].extend([ | |
"Consider increasing model complexity or ensemble methods", | |
"Experiment with advanced feature engineering techniques", | |
"Implement hyperparameter optimization", | |
"Try different neural network architectures" | |
]) | |
if validation["errors"]: | |
report["recommendations"].append("Fix validation errors before submission") | |
# Save report | |
report_file = ROOT_DIR / "final_competition_report.json" | |
with open(report_file, "w") as f: | |
json.dump(report, f, indent=2) | |
return report, report_file | |
# Generate the report | |
final_report, report_path = generate_final_report() | |
print(f"π Final report generated: {report_path}") | |
# Display key metrics | |
print(f"\nπ― Key Results:") | |
if 'results' in locals() and results: | |
print(f" Best RMSE: {results['best_rmse']:.6f}") | |
print(f" Target (0.013): {'β Achieved' if results['target_achieved'] else 'β Not achieved'}") | |
print(f" Total Iterations: {len(results['iterations'])}") | |
else: | |
print(f" β No results available") | |
print(f"\nπ Generated Files:") | |
for file in expected_files: | |
status = "β " if (ROOT_DIR / file).exists() else "β" | |
print(f" {status} {file}") | |
print(f"\nπ Submission Ready: {'Yes' if submission_valid else 'No'}") | |
# =============================================================================== | |
# CELL 13: Competition Summary and Next Steps | |
# =============================================================================== | |
print("\n" + "="*60) | |
print("π MSFT LOG RETURN PREDICTION COMPETITION COMPLETE") | |
print("="*60) | |
# Final summary | |
if 'results' in locals() and results and submission_valid: | |
if results["target_achieved"]: | |
print("π CONGRATULATIONS! Competition objectives achieved:") | |
print(f" β Target RMSE < 0.013 achieved: {results['best_rmse']:.6f}") | |
print(f" β All required files generated") | |
print(f" β Submission validation passed") | |
print(f" π Ready for competition submission!") | |
else: | |
print("π Competition completed with partial success:") | |
print(f" π Best RMSE achieved: {results['best_rmse']:.6f}") | |
print(f" π― Target RMSE: 0.013") | |
print(f" β All required files generated") | |
print(f" {'β ' if submission_valid else 'β'} Submission validation: {'Passed' if submission_valid else 'Failed'}") | |
else: | |
print("β Competition execution encountered issues") | |
print(" Please review the error logs and try again") | |
print(f"\nπ Submission Files Location: {ROOT_DIR}") | |
print(f"π Detailed logs: {ROOT_DIR}/competition_execution.log") | |
print(f"π Final report: {ROOT_DIR}/final_competition_report.json") | |
# Next steps | |
print(f"\nπ Next Steps:") | |
if 'results' in locals() and results and results["target_achieved"] and submission_valid: | |
print("1. β Your submission is ready!") | |
print("2. π Submit all generated files from the submission directory") | |
print("3. π Upload submission_log.json as your primary submission") | |
else: | |
print("1. π Review the performance analysis and error logs") | |
print("2. π§ Consider running additional iterations with improvements") | |
print("3. π― Focus on achieving RMSE < 0.013") | |
print("4. β Ensure all validation checks pass before submission") | |
print(f"\nβ° Competition completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
print("="*60) | |
# =============================================================================== | |
# CELL 14: Optional - Manual Testing and Debugging | |
# =============================================================================== | |
print("\nπ§ Manual Testing and Debugging Tools") | |
print("="*60) | |
def test_individual_script(script_name): | |
"""Test an individual generated script""" | |
script_path = ROOT_DIR / script_name | |
if not script_path.exists(): | |
print(f"β {script_name} not found") | |
return False | |
print(f"π§ͺ Testing {script_name}...") | |
try: | |
# Read and validate syntax | |
with open(script_path, "r") as f: | |
content = f.read() | |
# Basic syntax check | |
compile(content, script_name, 'exec') | |
print(f" β Syntax validation passed") | |
# Try to execute (commented out for safety in notebook) | |
# exec(content) | |
print(f" β οΈ Execution test skipped (uncomment to run)") | |
return True | |
except SyntaxError as e: | |
print(f" β Syntax error: {e}") | |
return False | |
except Exception as e: | |
print(f" β Execution error: {e}") | |
return False | |
def show_file_contents(filename, max_lines=50): | |
"""Display contents of a file""" | |
file_path = ROOT_DIR / filename | |
if not file_path.exists(): | |
print(f"β {filename} not found") | |
return | |
print(f"π Contents of {filename} (first {max_lines} lines):") | |
print("-" * 60) | |
try: | |
with open(file_path, "r") as f: | |
lines = f.readlines() | |
for i, line in enumerate(lines[:max_lines], 1): | |
print(f"{i:3d}: {line.rstrip()}") | |
if len(lines) > max_lines: | |
print(f"... ({len(lines) - max_lines} more lines)") | |
except Exception as e: | |
print(f"β Error reading file: {e}") | |
# Testing interface | |
print("Available testing functions:") | |
print("β’ test_individual_script('EDA.py') - Test EDA script") | |
print("β’ test_individual_script('FEATURE.py') - Test Feature script") | |
print("β’ test_individual_script('MODEL.py') - Test Model script") | |
print("β’ test_individual_script('EVAL.py') - Test Evaluation script") | |
print("β’ show_file_contents('filename.txt') - Display file contents") | |
# Example usage (uncomment to run): | |
# test_individual_script('EDA.py') | |
# show_file_contents('MSFT_Score.txt') | |
# =============================================================================== | |
# CELL 15: Advanced Analysis and Improvement Suggestions | |
# =============================================================================== | |
print("\nπ Advanced Performance Analysis") | |
print("="*60) | |
def analyze_rmse_progression(): | |
"""Analyze RMSE progression across iterations""" | |
if 'results' not in locals() or not results: | |
print("β No results available for analysis") | |
return | |
iterations = results["iterations"] | |
rmse_values = [] | |
print("RMSE Progression Analysis:") | |
print("-" * 40) | |
for i, iteration in enumerate(iterations): | |
rmse = iteration.get("test_rmse", float('inf')) | |
rmse_values.append(rmse) | |
improvement = "" | |
if i > 0 and rmse < rmse_values[i-1]: | |
improvement = f" (β {rmse_values[i-1] - rmse:.6f})" | |
elif i > 0 and rmse > rmse_values[i-1]: | |
improvement = f" (β {rmse - rmse_values[i-1]:.6f})" | |
status = "π―" if rmse < 0.013 else "π" | |
print(f"Iteration {i+1}: {status} {rmse:.6f}{improvement}") | |
# Calculate statistics | |
if rmse_values: | |
best_rmse = min(rmse_values) | |
worst_rmse = max(rmse_values) | |
avg_rmse = sum(rmse_values) / len(rmse_values) | |
print(f"\nStatistics:") | |
print(f" Best RMSE: {best_rmse:.6f}") | |
print(f" Worst RMSE: {worst_rmse:.6f}") | |
print(f" Average RMSE: {avg_rmse:.6f}") | |
print(f" Improvement: {worst_rmse - best_rmse:.6f}") | |
print(f" Target Gap: {best_rmse - 0.013:.6f}") | |
def generate_improvement_strategy(): | |
"""Generate specific improvement strategies""" | |
if 'results' not in locals() or not results: | |
print("β No results available for strategy generation") | |
return | |
best_rmse = results["best_rmse"] | |
target_rmse = 0.013 | |
print("π― Improvement Strategy Recommendations:") | |
print("-" * 40) | |
if best_rmse > 0.02: | |
print("π Current performance: POOR (RMSE > 0.02)") | |
print("Recommended actions:") | |
print(" 1. π§ Completely redesign feature engineering") | |
print(" 2. π§ Try ensemble methods (Random Forest + LSTM)") | |
print(" 3. π Add more technical indicators and market features") | |
print(" 4. β±οΈ Increase sequence length for LSTM") | |
print(" 5. ποΈ Implement hyperparameter optimization") | |
elif best_rmse > 0.015: | |
print("π Current performance: MODERATE (RMSE 0.015-0.02)") | |
print("Recommended actions:") | |
print(" 1. ποΈ Fine-tune model hyperparameters") | |
print(" 2. π§ Add regularization (dropout, L1/L2)") | |
print(" 3. π Implement feature selection/importance") | |
print(" 4. π§ Try attention mechanisms in LSTM") | |
print(" 5. π Add more sophisticated features") | |
elif best_rmse > 0.013: | |
print("π Current performance: GOOD (RMSE 0.013-0.015)") | |
print("Recommended actions:") | |
print(" 1. π― Fine-tune learning rate and batch size") | |
print(" 2. π§ Implement early stopping optimization") | |
print(" 3. π Feature engineering optimization") | |
print(" 4. π§ Model architecture tweaks") | |
print(" 5. π Cross-validation improvements") | |
else: | |
print("π― Current performance: EXCELLENT (RMSE < 0.013)") | |
print("π Target achieved! Consider:") | |
print(" 1. β Submit current solution") | |
print(" 2. π§ Further optimize for robustness") | |
print(" 3. π Document successful strategy") | |
# Run advanced analysis | |
analyze_rmse_progression() | |
print() | |
generate_improvement_strategy() | |
# =============================================================================== | |
# FINAL CELL: Cleanup and Summary | |
# =============================================================================== | |
print("\n" + "="*80) | |
print("π MSFT PREDICTION COMPETITION FRAMEWORK EXECUTION COMPLETE") | |
print("="*80) | |
# Final status summary | |
final_status = { | |
"framework_executed": True, | |
"files_generated": all((ROOT_DIR / f).exists() for f in expected_files), | |
"target_achieved": 'results' in locals() and results and results.get('target_achieved', False), | |
"submission_valid": submission_valid if 'submission_valid' in locals() else False | |
} | |
print("π FINAL STATUS SUMMARY:") | |
print(f" Framework Execution: {'β Complete' if final_status['framework_executed'] else 'β Failed'}") | |
print(f" File Generation: {'β Complete' if final_status['files_generated'] else 'β Incomplete'}") | |
print(f" Target Achievement: {'β RMSE < 0.013' if final_status['target_achieved'] else 'β Target not met'}") | |
print(f" Submission Validation: {'β Valid' if final_status['submission_valid'] else 'β Invalid'}") | |
# Competition score | |
score = sum(final_status.values()) | |
print(f"\nπ OVERALL SCORE: {score}/4") | |
if score == 4: | |
print("π PERFECT SCORE! Ready for competition submission!") | |
elif score >= 3: | |
print("π GOOD SCORE! Minor improvements needed.") | |
elif score >= 2: | |
print("π MODERATE SCORE! Some work required.") | |
else: | |
print("π§ NEEDS WORK! Review errors and retry.") | |
print(f"\nπ All files available at: {ROOT_DIR}") | |
print(f"π§ Support: Review logs in {ROOT_DIR}/competition_execution.log") | |
print(f"π Framework: Agno Multi-Agent System v1.0") | |
print(f"β° Completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
print("\n" + "="*80) | |
print("Thank you for using the MSFT Prediction Competition Framework!") | |
print("="*80) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment