-
-
Save theletterf/56bc8e3465cdaf1f0a9e8f6a75db108b to your computer and use it in GitHub Desktop.
Reddit posts analyzer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import praw | |
| import json | |
| import csv | |
| from datetime import datetime | |
| import re | |
| from collections import defaultdict | |
| class RedditTechWritingAnalyzer: | |
| def __init__(self): | |
| """ | |
| Initialize the Reddit analyzer. | |
| You'll need to set up Reddit API credentials at https://www.reddit.com/prefs/apps/ | |
| """ | |
| self.reddit = None | |
| self.posts_data = [] | |
| def setup_reddit_connection(self, client_id, client_secret, user_agent): | |
| """ | |
| Set up connection to Reddit API | |
| """ | |
| try: | |
| self.reddit = praw.Reddit( | |
| client_id=client_id, | |
| client_secret=client_secret, | |
| user_agent=user_agent | |
| ) | |
| print("β Reddit connection established") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to connect to Reddit: {e}") | |
| return False | |
| def collect_posts(self, subreddit_name="technicalwriting", limit=300, time_filter="month"): | |
| """ | |
| Collect posts from r/technicalwriting | |
| """ | |
| if not self.reddit: | |
| print("β Reddit connection not established") | |
| return False | |
| try: | |
| subreddit = self.reddit.subreddit(subreddit_name) | |
| # Get posts from different categories | |
| post_sources = [ | |
| ("hot", subreddit.hot(limit=limit//3)), | |
| ("new", subreddit.new(limit=limit//3)), | |
| ("top", subreddit.top(time_filter=time_filter, limit=limit//3)) | |
| ] | |
| for source_name, posts in post_sources: | |
| print(f"Collecting {source_name} posts...") | |
| for post in posts: | |
| post_data = { | |
| 'id': post.id, | |
| 'title': post.title, | |
| 'selftext': post.selftext, | |
| 'score': post.score, | |
| 'num_comments': post.num_comments, | |
| 'created_utc': post.created_utc, | |
| 'url': post.url, | |
| 'source': source_name | |
| } | |
| self.posts_data.append(post_data) | |
| # Remove duplicates based on post ID | |
| seen_ids = set() | |
| unique_posts = [] | |
| for post in self.posts_data: | |
| if post['id'] not in seen_ids: | |
| unique_posts.append(post) | |
| seen_ids.add(post['id']) | |
| self.posts_data = unique_posts | |
| print(f"β Collected {len(self.posts_data)} unique posts") | |
| return True | |
| except Exception as e: | |
| print(f"β Error collecting posts: {e}") | |
| return False | |
| def save_data(self, filename_prefix="technicalwriting_posts"): | |
| """ | |
| Save collected data to JSON and CSV files | |
| """ | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| # Save as JSON | |
| json_filename = f"{filename_prefix}_{timestamp}.json" | |
| with open(json_filename, 'w', encoding='utf-8') as f: | |
| json.dump(self.posts_data, f, indent=2, ensure_ascii=False) | |
| # Save as CSV for easy viewing | |
| csv_filename = f"{filename_prefix}_{timestamp}.csv" | |
| with open(csv_filename, 'w', newline='', encoding='utf-8') as f: | |
| if self.posts_data: | |
| writer = csv.DictWriter(f, fieldnames=self.posts_data[0].keys()) | |
| writer.writeheader() | |
| writer.writerows(self.posts_data) | |
| print(f"β Data saved to {json_filename} and {csv_filename}") | |
| return json_filename, csv_filename | |
| def prepare_for_analysis(self, include_content=True): | |
| """ | |
| Prepare clean data for Claude to analyze without any preprocessing filters | |
| """ | |
| analysis_data = [] | |
| for post in self.posts_data: | |
| # Create clean data structure for analysis | |
| clean_post = { | |
| 'id': post['id'], | |
| 'title': post['title'], | |
| 'score': post['score'], | |
| 'num_comments': post['num_comments'], | |
| 'created_utc': post['created_utc'] | |
| } | |
| # Include post content if requested and available | |
| if include_content and post['selftext'] and len(post['selftext'].strip()) > 0: | |
| clean_post['content'] = post['selftext'] | |
| analysis_data.append(clean_post) | |
| return analysis_data | |
| def export_for_claude_analysis(self, max_posts=200): | |
| """ | |
| Export data in a format optimized for Claude analysis | |
| """ | |
| analysis_data = self.prepare_for_analysis() | |
| # Limit to most recent/relevant posts to stay within context limits | |
| if len(analysis_data) > max_posts: | |
| # Sort by engagement (score + comments) and recency | |
| analysis_data.sort(key=lambda x: x['score'] + x['num_comments'], reverse=True) | |
| analysis_data = analysis_data[:max_posts] | |
| # Create formatted output for Claude | |
| formatted_output = { | |
| 'total_posts_collected': len(self.posts_data), | |
| 'posts_for_analysis': len(analysis_data), | |
| 'collection_date': datetime.now().isoformat(), | |
| 'posts': analysis_data | |
| } | |
| return formatted_output | |
| def identify_anxious_patterns(self): | |
| """ | |
| Identify patterns that suggest anxiety or uncertainty in posts | |
| """ | |
| # Anxiety indicators | |
| anxiety_patterns = { | |
| 'validation_seeking': [ | |
| r'\bshould i\b', r'\bam i wrong\b', r'\bis it okay\b', r'\bis this normal\b', | |
| r'\bdoes anyone else\b', r'\bis it just me\b', r'\bvalidate\b' | |
| ], | |
| 'imposter_syndrome': [ | |
| r'\bimposter\b', r'\bnot qualified\b', r'\bdont belong\b', r'\bfake it\b', | |
| r'\bnot good enough\b', r'\bunqualified\b', r'\bfraud\b' | |
| ], | |
| 'career_anxiety': [ | |
| r'\bcareer change\b', r'\bswitch\b', r'\btoo late\b', r'\bworth it\b', | |
| r'\bmistake\b', r'\bregret\b', r'\bscared\b', r'\bworried\b' | |
| ], | |
| 'skill_doubt': [ | |
| r'\bnot technical enough\b', r'\bdont know\b', r'\blearning curve\b', | |
| r'\bstruggling with\b', r'\bdifficult\b', r'\boverwhelmed\b' | |
| ], | |
| 'job_insecurity': [ | |
| r'\bjob market\b', r'\bgetting hired\b', r'\bcompetitive\b', r'\brejection\b', | |
| r'\bunemployed\b', r'\blaid off\b', r'\binsecure\b' | |
| ] | |
| } | |
| # Question patterns that suggest anxiety | |
| anxious_question_patterns = [ | |
| r'\?{2,}', # Multiple question marks | |
| r'\bhelp\?', r'\badvice\?', r'\bthoughts\?', r'\bopinions\?' | |
| ] | |
| results = defaultdict(list) | |
| for post in self.posts_data: | |
| full_text = f"{post['title']} {post['selftext']}".lower() | |
| # Check for anxiety patterns | |
| for category, patterns in anxiety_patterns.items(): | |
| for pattern in patterns: | |
| if re.search(pattern, full_text, re.IGNORECASE): | |
| results[category].append({ | |
| 'title': post['title'], | |
| 'id': post['id'], | |
| 'score': post['score'], | |
| 'comments': post['num_comments'], | |
| 'matched_pattern': pattern | |
| }) | |
| break # Don't double-count same post | |
| # Check for anxious question patterns | |
| title_lower = post['title'].lower() | |
| for pattern in anxious_question_patterns: | |
| if re.search(pattern, title_lower): | |
| results['anxious_questions'].append({ | |
| 'title': post['title'], | |
| 'id': post['id'], | |
| 'score': post['score'], | |
| 'comments': post['num_comments'], | |
| 'matched_pattern': pattern | |
| }) | |
| break | |
| return dict(results) | |
| # Usage example | |
| if __name__ == "__main__": | |
| print("π Reddit Technical Writing Analyzer") | |
| print("=" * 50) | |
| analyzer = RedditTechWritingAnalyzer() | |
| # You'll need to fill these in with your Reddit API credentials | |
| CLIENT_ID = input("Enter your Reddit CLIENT_ID: ").strip() | |
| CLIENT_SECRET = input("Enter your Reddit CLIENT_SECRET: ").strip() | |
| USER_AGENT = input("Enter your username for USER_AGENT (e.g., 'YourUsername'): ").strip() | |
| USER_AGENT = f"TechWritingAnalyzer/1.0 by {USER_AGENT}" | |
| if not CLIENT_ID: | |
| print("β Reddit credentials not found") | |
| exit() | |
| if analyzer.setup_reddit_connection(CLIENT_ID, CLIENT_SECRET, USER_AGENT): | |
| if analyzer.collect_posts(): | |
| json_file, csv_file = analyzer.save_data() | |
| print("\nπ€ Preparing data for Claude analysis...") | |
| claude_data = analyzer.export_for_claude_analysis() | |
| # Save the analysis-ready data | |
| analysis_filename = f"claude_analysis_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" | |
| with open(analysis_filename, 'w', encoding='utf-8') as f: | |
| json.dump(claude_data, f, indent=2, ensure_ascii=False) | |
| print(f"β Analysis data ready: {analysis_filename}") | |
| print(f"π {claude_data['posts_for_analysis']} posts prepared for semantic analysis") | |
| print("\nπ€ Next: Share this file with Claude for unbiased semantic analysis!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment