Created
July 21, 2025 20:05
-
-
Save standardnguyen/b4ffa0d8a3e2aba344b991de68ed9b3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| DeepSeek API Batch Processor | |
| This script reads markdown files from ./prompts_for_raw_chapters/ | |
| sends them to DeepSeek API, and saves responses to ./responses_from_deepseek/ | |
| Requirements: | |
| - pip install openai python-dotenv | |
| - Create a .env file with: DEEPSEEK_API_KEY=your_api_key_here | |
| """ | |
| import os | |
| import glob | |
| import time | |
| from pathlib import Path | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| class DeepSeekBatchProcessor: | |
| def __init__(self): | |
| self.api_key = os.getenv('DEEPSEEK_API_KEY') | |
| if not self.api_key: | |
| raise ValueError("DEEPSEEK_API_KEY environment variable is required") | |
| # Initialize OpenAI client with DeepSeek settings | |
| self.client = OpenAI( | |
| api_key=self.api_key, | |
| base_url="https://api.deepseek.com" | |
| ) | |
| # Set up directories | |
| self.prompts_dir = Path("./prompts_for_raw_chapters/") | |
| self.responses_dir = Path("./responses_from_deepseek/") | |
| # Create responses directory if it doesn't exist | |
| self.responses_dir.mkdir(exist_ok=True) | |
| def read_prompt_file(self, filepath): | |
| """Read the content of a prompt file""" | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| return f.read().strip() | |
| except Exception as e: | |
| print(f"Error reading file {filepath}: {e}") | |
| return None | |
| def send_to_deepseek(self, prompt, model="deepseek-chat"): | |
| """Send prompt to DeepSeek API and return response""" | |
| try: | |
| response = self.client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| stream=False | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"API Error: {e}") | |
| return None | |
| def save_response(self, response_text, output_filename): | |
| """Save response to file""" | |
| try: | |
| output_path = self.responses_dir / output_filename | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| f.write(response_text) | |
| print(f"✓ Saved response to {output_path}") | |
| return True | |
| except Exception as e: | |
| print(f"Error saving response to {output_filename}: {e}") | |
| return False | |
| def get_prompt_files(self): | |
| """Get all prompt files matching the pattern""" | |
| pattern = str(self.prompts_dir / "prompt_chapter_*.md") | |
| files = glob.glob(pattern) | |
| # Sort files to process in order | |
| return sorted(files) | |
| def process_all_prompts(self, delay_seconds=1): | |
| """Process all prompt files""" | |
| prompt_files = self.get_prompt_files() | |
| if not prompt_files: | |
| print(f"No files found matching pattern: {self.prompts_dir}/prompt_chapter_*.md") | |
| return | |
| print(f"Found {len(prompt_files)} prompt files to process") | |
| print(f"Responses will be saved to: {self.responses_dir}") | |
| print("-" * 50) | |
| successful = 0 | |
| failed = 0 | |
| for i, filepath in enumerate(prompt_files, 1): | |
| filename = Path(filepath).name | |
| print(f"Processing {i}/{len(prompt_files)}: {filename}") | |
| # Read the prompt | |
| prompt = self.read_prompt_file(filepath) | |
| if prompt is None: | |
| print(f"✗ Skipped {filename} (read error)") | |
| failed += 1 | |
| continue | |
| # Send to DeepSeek | |
| response = self.send_to_deepseek(prompt) | |
| if response is None: | |
| print(f"✗ Failed to get response for {filename}") | |
| failed += 1 | |
| continue | |
| # Generate output filename | |
| # Convert prompt_chapter_01.md -> response_chapter_01.md | |
| output_filename = filename.replace("prompt_", "response_") | |
| # Save response | |
| if self.save_response(response, output_filename): | |
| successful += 1 | |
| else: | |
| failed += 1 | |
| # Add delay to avoid rate limiting | |
| if i < len(prompt_files): # Don't delay after the last file | |
| print(f"Waiting {delay_seconds} seconds...") | |
| time.sleep(delay_seconds) | |
| print("-" * 50) | |
| print(f"Processing complete!") | |
| print(f"✓ Successful: {successful}") | |
| print(f"✗ Failed: {failed}") | |
| def main(): | |
| """Main function""" | |
| try: | |
| processor = DeepSeekBatchProcessor() | |
| processor.process_all_prompts(delay_seconds=1) | |
| except KeyboardInterrupt: | |
| print("\nProcess interrupted by user") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment