Created
January 20, 2024 01:52
-
-
Save evangriffiths/9e3be550d80ff4b675e6db5770e1d1f2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import evo_researcher.benchmark.benchmark as bm | |
from evo_researcher.benchmark.utils import get_manifold_markets | |
benchmarker = bm.Benchmarker( | |
markets=get_manifold_markets(number=24), | |
agents=[ | |
bm.EvoAgent(model="gpt-4-1106-preview"), | |
bm.OlasAgent(model="gpt-3.5-turbo"), | |
], | |
cache_path="./.cache.json", | |
) | |
benchmarker.run_agents() | |
md = benchmarker.generate_markdown_report() | |
output = f"./benchmark_report.{int(time.time())}.md" | |
with open(output, "w") as f: | |
print(f"Writing benchmark report to: {output}") | |
f.write(md) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Comparison Report
Summary Statistics
p_yes
Markets