Files
christmasevent/StatsHelper.py
Alexander Dörflinger 783a95f4b4 Initial commit
2025-12-08 10:58:53 +01:00

249 lines
8.9 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import praw
import time
from datetime import datetime, timezone
from collections import Counter, defaultdict
import config
# --- SETTINGS ---
START_YEAR = 2024
START_MONTH = 12 # December
AVAILABLE_FLAIRS=[
"Moderator Announcement 📢",
"Horny😈",
"Appreciation ☺️",
"Meme!😋",
"Lore📚✍",
"Hunter😈",
"Truth or Dare - NNYC Edition - 🥵",
"Truth or Dare - NNYC Edition - 💞",
"Triumph Diary",
"Defeat Diary",
"cursed meme🙃👿",
"Gamble 🎰🎲🃏",
"DDD Challenge",
"Holly Jolly Time 🎄⛄🦌🎁🍪🌟",
"Happy New Year 🎊🥂🎉🍀",
"Easter Egg-Stravaganza 🐰🐇",
"[Game] - Would you rather...⁉️",
":pokeball::pokeball: Gotta catch 'em all :pokeball::pokeball:",
"Will you be my Valentine 💖❤️💌"
]
class StatsHelper:
def __init__(self):
print("🎄 Initializing StatsHelper...")
def get_next_month_start(self, year, month):
"""Helper to calculate the first day of the NEXT month."""
if month == 12:
return year + 1, 1
else:
return year, month + 1
def process_batch(self, batch_name, post_list):
"""
Helper function to process a list of posts and print stats.
"""
print(f"\n🔵 PROCESSING: {batch_name} (Limit: ~1000)")
print("-" * 60)
unknown_flairs = Counter()
matched_count = 0
no_flair_count = 0
total_scanned = 0
for post in post_list:
total_scanned += 1
flair = post.link_flair_text
# Check 1: No Flair
if not flair:
no_flair_count += 1
continue
# Check 2: Known List
if flair in AVAILABLE_FLAIRS:
matched_count += 1
else:
unknown_flairs[flair] += 1
# --- REPORT FOR THIS BATCH ---
print(f"Total Retrieved: {total_scanned}")
print(f"✅ Known Flairs: {matched_count}")
print(f"👻 No Flair: {no_flair_count}")
if unknown_flairs:
print(f"⚠️ UNKNOWN FLAIRS: {sum(unknown_flairs.values())}")
print(f" (Check these for typos/spaces)")
for f, c in unknown_flairs.most_common():
# Use repr() to see hidden spaces, e.g. 'Meme '
print(f" {repr(f):<40} : {c}")
else:
print("🎉 No unknown flairs found.")
print("-" * 60)
def analyze(self):
reddit = praw.Reddit(
client_id=config.eventBotClientId,
client_secret=config.eventBotSecret,
user_agent=config.user_agent,
username=config.megathread_username,
password=config.megathread_password
)
# Trackers
unknown_flairs = Counter()
no_flair_count = 0
matched_count = 0
subreddit = reddit.subreddit(config.subreddit_name)
# # We use a dictionary of Counters:
# # Structure: { "2023-10": Counter({'FlairA': 10, 'FlairB': 2}), "2023-11": ... }
# monthly_stats = defaultdict(Counter)
# # We will also create a counter for the TOTAL stats across all months
# grand_total_stats = Counter()
#
# print(f"Starting precise calendar analysis for r/{config.subreddit_name}...")
#
# total_posts_processed = 0
#
# for available_flair in AVAILABLE_FLAIRS:
# # Prepare the query
# # syntax='cloudsearch' requires the timestamp field
# # include_over_18 ensures we see posts even if sub is flagged mature
# query = f'flair:"{available_flair}"'
# print(f"Checking {available_flair}...")
#
# try:
# posts = list(subreddit.search(
# query,
# limit=None,
# syntax="cloudsearch",
# params={'include_over_18': 'on'}
# ))
#
# overall_count = len(posts)
# print(f"Found {overall_count} posts for {available_flair}...")
#
# count_for_flair = 0
# for post in posts:
# # 2. Convert timestamp to "YYYY-MM" string key
# dt = datetime.fromtimestamp(post.created_utc, timezone.utc)
# month_key = dt.strftime('%Y-%m')
#
# # 3. Add to stats
# # We use 'available_flair' to ensure the naming is consistent
# # (even if the post has slightly messed up encoding)
# monthly_stats[month_key][available_flair] += 1
# # 4. Add to Grand Total stats
# grand_total_stats[available_flair] += 1
# total_posts_processed += 1
# count_for_flair += 1
#
# if count_for_flair == 0:
# print("⚠️ (0 in range)")
# else:
# print(f"✅ ({count_for_flair} found)")
#
# except Exception as e:
# print(f"\n❌ Error: {e}")
#
# # --- OUTPUT RESULTS ---
# print("\n" + "=" * 60)
# print(f"FINAL REPORT: {total_posts_processed} POSTS ANALYZED")
# print("=" * 60)
#
# # Sort keys to ensure chronological printing (2024-12, then 2025-01)
# sorted_months = sorted(monthly_stats.keys())
#
# if not sorted_months:
# print("No posts found in the specified timeframe.")
#
# for month_key in sorted_months:
# print(f"\n📅 PERIOD: {month_key}")
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
# print("-" * 60)
#
# total_in_month = sum(monthly_stats[month_key].values())
#
# # Sort flairs by popularity within that month
# for flair, count in monthly_stats[month_key].most_common():
# percentage = (count / total_in_month) * 100
#
# # Truncate flair name if it's too long for the table
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
#
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
#
# # --- OUTPUT 2: GRAND TOTAL SUMMARY ---
# print("\n" + "=" * 60)
# print(f"PART 2: GRAND TOTAL SUMMARY (ALL MONTHS COMBINED)")
# print(f"Total Posts: {total_posts_processed}")
# print("=" * 60)
#
# print(f"\n📅 PERIOD: ALL TIME ({sorted_months[0]} to {sorted_months[-1]})")
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
# print("-" * 60)
#
# for flair, count in grand_total_stats.most_common():
# # Percentage relative to the TOTAL posts analyzed across the whole period
# percentage = (count / total_posts_processed) * 100
#
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
#One more search for potentially missed posts
try:
# print("\nFetching .new() ...")
# posts_new = list(subreddit.new(limit=None))
# self.process_batch("SUBREDDIT.NEW", posts_new)
#
# # 2. FETCH HOT (Algorithm sorted, max ~1000)
# print("\nFetching .hot() ...")
# posts_hot = list(subreddit.hot(limit=None))
# self.process_batch("SUBREDDIT.HOT", posts_hot)
#
# # 2. FETCH TOP (Algorithm sorted, max ~1000)
# print("\nFetching .top() ...")
# posts_hot = list(subreddit.top(limit=None))
# self.process_batch("SUBREDDIT.TOP", posts_hot)
#
# # 2. FETCH CONTROVERSIAL (Algorithm sorted, max ~1000)
# print("\nFetching .controversial() ...")
# posts_hot = list(subreddit.controversial(limit=None))
# self.process_batch("SUBREDDIT.CONTROVERSIAL", posts_hot)
#
# # 2. FETCH RISING (Algorithm sorted, max ~1000)
# print("\nFetching .rising() ...")
# posts_hot = list(subreddit.rising(limit=None))
# self.process_batch("SUBREDDIT.RISING", posts_hot)
random_counter=0
print("\nFetching .random() ...")
while random_counter < 10000:
rand_post = subreddit.random()
flair = rand_post.link_flair_text
# Check 1: No Flair
if not flair:
print(f"Found unflaired post => {rand_post.id}")
continue
# Check 2: Known List
if not flair in AVAILABLE_FLAIRS:
print(f"Found unexpected flair => {flair}")
if random_counter % 10 == 0:
time.sleep(2)
except Exception as e:
print(f"\n❌ Error: {e}")