Initial commit

This commit is contained in:
Alexander Dörflinger
2025-12-08 10:58:53 +01:00
commit 783a95f4b4
19 changed files with 933 additions and 0 deletions

248
StatsHelper.py Normal file
View File

@@ -0,0 +1,248 @@
#!/usr/bin/env python3
import praw
import time
from datetime import datetime, timezone
from collections import Counter, defaultdict
import config
# --- SETTINGS ---
START_YEAR = 2024
START_MONTH = 12 # December
AVAILABLE_FLAIRS=[
"Moderator Announcement 📢",
"Horny😈",
"Appreciation ☺️",
"Meme!😋",
"Lore📚✍",
"Hunter😈",
"Truth or Dare - NNYC Edition - 🥵",
"Truth or Dare - NNYC Edition - 💞",
"Triumph Diary",
"Defeat Diary",
"cursed meme🙃👿",
"Gamble 🎰🎲🃏",
"DDD Challenge",
"Holly Jolly Time 🎄⛄🦌🎁🍪🌟",
"Happy New Year 🎊🥂🎉🍀",
"Easter Egg-Stravaganza 🐰🐇",
"[Game] - Would you rather...⁉️",
":pokeball::pokeball: Gotta catch 'em all :pokeball::pokeball:",
"Will you be my Valentine 💖❤️💌"
]
class StatsHelper:
def __init__(self):
print("🎄 Initializing StatsHelper...")
def get_next_month_start(self, year, month):
"""Helper to calculate the first day of the NEXT month."""
if month == 12:
return year + 1, 1
else:
return year, month + 1
def process_batch(self, batch_name, post_list):
"""
Helper function to process a list of posts and print stats.
"""
print(f"\n🔵 PROCESSING: {batch_name} (Limit: ~1000)")
print("-" * 60)
unknown_flairs = Counter()
matched_count = 0
no_flair_count = 0
total_scanned = 0
for post in post_list:
total_scanned += 1
flair = post.link_flair_text
# Check 1: No Flair
if not flair:
no_flair_count += 1
continue
# Check 2: Known List
if flair in AVAILABLE_FLAIRS:
matched_count += 1
else:
unknown_flairs[flair] += 1
# --- REPORT FOR THIS BATCH ---
print(f"Total Retrieved: {total_scanned}")
print(f"✅ Known Flairs: {matched_count}")
print(f"👻 No Flair: {no_flair_count}")
if unknown_flairs:
print(f"⚠️ UNKNOWN FLAIRS: {sum(unknown_flairs.values())}")
print(f" (Check these for typos/spaces)")
for f, c in unknown_flairs.most_common():
# Use repr() to see hidden spaces, e.g. 'Meme '
print(f" {repr(f):<40} : {c}")
else:
print("🎉 No unknown flairs found.")
print("-" * 60)
def analyze(self):
reddit = praw.Reddit(
client_id=config.eventBotClientId,
client_secret=config.eventBotSecret,
user_agent=config.user_agent,
username=config.megathread_username,
password=config.megathread_password
)
# Trackers
unknown_flairs = Counter()
no_flair_count = 0
matched_count = 0
subreddit = reddit.subreddit(config.subreddit_name)
# # We use a dictionary of Counters:
# # Structure: { "2023-10": Counter({'FlairA': 10, 'FlairB': 2}), "2023-11": ... }
# monthly_stats = defaultdict(Counter)
# # We will also create a counter for the TOTAL stats across all months
# grand_total_stats = Counter()
#
# print(f"Starting precise calendar analysis for r/{config.subreddit_name}...")
#
# total_posts_processed = 0
#
# for available_flair in AVAILABLE_FLAIRS:
# # Prepare the query
# # syntax='cloudsearch' requires the timestamp field
# # include_over_18 ensures we see posts even if sub is flagged mature
# query = f'flair:"{available_flair}"'
# print(f"Checking {available_flair}...")
#
# try:
# posts = list(subreddit.search(
# query,
# limit=None,
# syntax="cloudsearch",
# params={'include_over_18': 'on'}
# ))
#
# overall_count = len(posts)
# print(f"Found {overall_count} posts for {available_flair}...")
#
# count_for_flair = 0
# for post in posts:
# # 2. Convert timestamp to "YYYY-MM" string key
# dt = datetime.fromtimestamp(post.created_utc, timezone.utc)
# month_key = dt.strftime('%Y-%m')
#
# # 3. Add to stats
# # We use 'available_flair' to ensure the naming is consistent
# # (even if the post has slightly messed up encoding)
# monthly_stats[month_key][available_flair] += 1
# # 4. Add to Grand Total stats
# grand_total_stats[available_flair] += 1
# total_posts_processed += 1
# count_for_flair += 1
#
# if count_for_flair == 0:
# print("⚠️ (0 in range)")
# else:
# print(f"✅ ({count_for_flair} found)")
#
# except Exception as e:
# print(f"\n❌ Error: {e}")
#
# # --- OUTPUT RESULTS ---
# print("\n" + "=" * 60)
# print(f"FINAL REPORT: {total_posts_processed} POSTS ANALYZED")
# print("=" * 60)
#
# # Sort keys to ensure chronological printing (2024-12, then 2025-01)
# sorted_months = sorted(monthly_stats.keys())
#
# if not sorted_months:
# print("No posts found in the specified timeframe.")
#
# for month_key in sorted_months:
# print(f"\n📅 PERIOD: {month_key}")
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
# print("-" * 60)
#
# total_in_month = sum(monthly_stats[month_key].values())
#
# # Sort flairs by popularity within that month
# for flair, count in monthly_stats[month_key].most_common():
# percentage = (count / total_in_month) * 100
#
# # Truncate flair name if it's too long for the table
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
#
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
#
# # --- OUTPUT 2: GRAND TOTAL SUMMARY ---
# print("\n" + "=" * 60)
# print(f"PART 2: GRAND TOTAL SUMMARY (ALL MONTHS COMBINED)")
# print(f"Total Posts: {total_posts_processed}")
# print("=" * 60)
#
# print(f"\n📅 PERIOD: ALL TIME ({sorted_months[0]} to {sorted_months[-1]})")
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
# print("-" * 60)
#
# for flair, count in grand_total_stats.most_common():
# # Percentage relative to the TOTAL posts analyzed across the whole period
# percentage = (count / total_posts_processed) * 100
#
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
#One more search for potentially missed posts
try:
# print("\nFetching .new() ...")
# posts_new = list(subreddit.new(limit=None))
# self.process_batch("SUBREDDIT.NEW", posts_new)
#
# # 2. FETCH HOT (Algorithm sorted, max ~1000)
# print("\nFetching .hot() ...")
# posts_hot = list(subreddit.hot(limit=None))
# self.process_batch("SUBREDDIT.HOT", posts_hot)
#
# # 2. FETCH TOP (Algorithm sorted, max ~1000)
# print("\nFetching .top() ...")
# posts_hot = list(subreddit.top(limit=None))
# self.process_batch("SUBREDDIT.TOP", posts_hot)
#
# # 2. FETCH CONTROVERSIAL (Algorithm sorted, max ~1000)
# print("\nFetching .controversial() ...")
# posts_hot = list(subreddit.controversial(limit=None))
# self.process_batch("SUBREDDIT.CONTROVERSIAL", posts_hot)
#
# # 2. FETCH RISING (Algorithm sorted, max ~1000)
# print("\nFetching .rising() ...")
# posts_hot = list(subreddit.rising(limit=None))
# self.process_batch("SUBREDDIT.RISING", posts_hot)
random_counter=0
print("\nFetching .random() ...")
while random_counter < 10000:
rand_post = subreddit.random()
flair = rand_post.link_flair_text
# Check 1: No Flair
if not flair:
print(f"Found unflaired post => {rand_post.id}")
continue
# Check 2: Known List
if not flair in AVAILABLE_FLAIRS:
print(f"Found unexpected flair => {flair}")
if random_counter % 10 == 0:
time.sleep(2)
except Exception as e:
print(f"\n❌ Error: {e}")