#!/usr/bin/env python3 import praw import time from datetime import datetime, timezone from collections import Counter, defaultdict import config # --- SETTINGS --- START_YEAR = 2024 START_MONTH = 12 # December AVAILABLE_FLAIRS=[ "Moderator Announcement πŸ“’", "Horny😈", "Appreciation ☺️", "Meme!πŸ˜‹", "LoreπŸ“šβœοΈ", "Hunter😈", "Truth or Dare - NNYC Edition - πŸ₯΅", "Truth or Dare - NNYC Edition - πŸ’ž", "Triumph Diary", "Defeat Diary", "cursed memeπŸ™ƒπŸ‘Ώ", "Gamble πŸŽ°πŸŽ²πŸƒ", "DDD Challenge", "Holly Jolly Time πŸŽ„β›„πŸ¦ŒπŸŽπŸͺ🌟", "Happy New Year 🎊πŸ₯‚πŸŽ‰πŸ€", "Easter Egg-Stravaganza πŸ°πŸ‡", "[Game] - Would you rather...⁉️", ":pokeball::pokeball: Gotta catch 'em all :pokeball::pokeball:", "Will you be my Valentine πŸ’–β€οΈπŸ’Œ" ] class StatsHelper: def __init__(self): print("πŸŽ„ Initializing StatsHelper...") def get_next_month_start(self, year, month): """Helper to calculate the first day of the NEXT month.""" if month == 12: return year + 1, 1 else: return year, month + 1 def process_batch(self, batch_name, post_list): """ Helper function to process a list of posts and print stats. """ print(f"\nπŸ”΅ PROCESSING: {batch_name} (Limit: ~1000)") print("-" * 60) unknown_flairs = Counter() matched_count = 0 no_flair_count = 0 total_scanned = 0 for post in post_list: total_scanned += 1 flair = post.link_flair_text # Check 1: No Flair if not flair: no_flair_count += 1 continue # Check 2: Known List if flair in AVAILABLE_FLAIRS: matched_count += 1 else: unknown_flairs[flair] += 1 # --- REPORT FOR THIS BATCH --- print(f"Total Retrieved: {total_scanned}") print(f"βœ… Known Flairs: {matched_count}") print(f"πŸ‘» No Flair: {no_flair_count}") if unknown_flairs: print(f"⚠️ UNKNOWN FLAIRS: {sum(unknown_flairs.values())}") print(f" (Check these for typos/spaces)") for f, c in unknown_flairs.most_common(): # Use repr() to see hidden spaces, e.g. 'Meme ' print(f" {repr(f):<40} : {c}") else: print("πŸŽ‰ No unknown flairs found.") print("-" * 60) def analyze(self): reddit = praw.Reddit( client_id=config.eventBotClientId, client_secret=config.eventBotSecret, user_agent=config.user_agent, username=config.megathread_username, password=config.megathread_password ) # Trackers unknown_flairs = Counter() no_flair_count = 0 matched_count = 0 subreddit = reddit.subreddit(config.subreddit_name) # # We use a dictionary of Counters: # # Structure: { "2023-10": Counter({'FlairA': 10, 'FlairB': 2}), "2023-11": ... } # monthly_stats = defaultdict(Counter) # # We will also create a counter for the TOTAL stats across all months # grand_total_stats = Counter() # # print(f"Starting precise calendar analysis for r/{config.subreddit_name}...") # # total_posts_processed = 0 # # for available_flair in AVAILABLE_FLAIRS: # # Prepare the query # # syntax='cloudsearch' requires the timestamp field # # include_over_18 ensures we see posts even if sub is flagged mature # query = f'flair:"{available_flair}"' # print(f"Checking {available_flair}...") # # try: # posts = list(subreddit.search( # query, # limit=None, # syntax="cloudsearch", # params={'include_over_18': 'on'} # )) # # overall_count = len(posts) # print(f"Found {overall_count} posts for {available_flair}...") # # count_for_flair = 0 # for post in posts: # # 2. Convert timestamp to "YYYY-MM" string key # dt = datetime.fromtimestamp(post.created_utc, timezone.utc) # month_key = dt.strftime('%Y-%m') # # # 3. Add to stats # # We use 'available_flair' to ensure the naming is consistent # # (even if the post has slightly messed up encoding) # monthly_stats[month_key][available_flair] += 1 # # 4. Add to Grand Total stats # grand_total_stats[available_flair] += 1 # total_posts_processed += 1 # count_for_flair += 1 # # if count_for_flair == 0: # print("⚠️ (0 in range)") # else: # print(f"βœ… ({count_for_flair} found)") # # except Exception as e: # print(f"\n❌ Error: {e}") # # # --- OUTPUT RESULTS --- # print("\n" + "=" * 60) # print(f"FINAL REPORT: {total_posts_processed} POSTS ANALYZED") # print("=" * 60) # # # Sort keys to ensure chronological printing (2024-12, then 2025-01) # sorted_months = sorted(monthly_stats.keys()) # # if not sorted_months: # print("No posts found in the specified timeframe.") # # for month_key in sorted_months: # print(f"\nπŸ“… PERIOD: {month_key}") # print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}") # print("-" * 60) # # total_in_month = sum(monthly_stats[month_key].values()) # # # Sort flairs by popularity within that month # for flair, count in monthly_stats[month_key].most_common(): # percentage = (count / total_in_month) * 100 # # # Truncate flair name if it's too long for the table # display_flair = (flair[:37] + '..') if len(flair) > 37 else flair # # print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%") # # # --- OUTPUT 2: GRAND TOTAL SUMMARY --- # print("\n" + "=" * 60) # print(f"PART 2: GRAND TOTAL SUMMARY (ALL MONTHS COMBINED)") # print(f"Total Posts: {total_posts_processed}") # print("=" * 60) # # print(f"\nπŸ“… PERIOD: ALL TIME ({sorted_months[0]} to {sorted_months[-1]})") # print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}") # print("-" * 60) # # for flair, count in grand_total_stats.most_common(): # # Percentage relative to the TOTAL posts analyzed across the whole period # percentage = (count / total_posts_processed) * 100 # # display_flair = (flair[:37] + '..') if len(flair) > 37 else flair # print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%") #One more search for potentially missed posts try: # print("\nFetching .new() ...") # posts_new = list(subreddit.new(limit=None)) # self.process_batch("SUBREDDIT.NEW", posts_new) # # # 2. FETCH HOT (Algorithm sorted, max ~1000) # print("\nFetching .hot() ...") # posts_hot = list(subreddit.hot(limit=None)) # self.process_batch("SUBREDDIT.HOT", posts_hot) # # # 2. FETCH TOP (Algorithm sorted, max ~1000) # print("\nFetching .top() ...") # posts_hot = list(subreddit.top(limit=None)) # self.process_batch("SUBREDDIT.TOP", posts_hot) # # # 2. FETCH CONTROVERSIAL (Algorithm sorted, max ~1000) # print("\nFetching .controversial() ...") # posts_hot = list(subreddit.controversial(limit=None)) # self.process_batch("SUBREDDIT.CONTROVERSIAL", posts_hot) # # # 2. FETCH RISING (Algorithm sorted, max ~1000) # print("\nFetching .rising() ...") # posts_hot = list(subreddit.rising(limit=None)) # self.process_batch("SUBREDDIT.RISING", posts_hot) random_counter=0 print("\nFetching .random() ...") while random_counter < 10000: rand_post = subreddit.random() flair = rand_post.link_flair_text # Check 1: No Flair if not flair: print(f"Found unflaired post => {rand_post.id}") continue # Check 2: Known List if not flair in AVAILABLE_FLAIRS: print(f"Found unexpected flair => {flair}") if random_counter % 10 == 0: time.sleep(2) except Exception as e: print(f"\n❌ Error: {e}")