249 lines
8.9 KiB
Python
249 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
||
import praw
|
||
import time
|
||
from datetime import datetime, timezone
|
||
from collections import Counter, defaultdict
|
||
|
||
import config
|
||
|
||
# --- SETTINGS ---
|
||
START_YEAR = 2024
|
||
START_MONTH = 12 # December
|
||
|
||
AVAILABLE_FLAIRS=[
|
||
"Moderator Announcement 📢",
|
||
"Horny😈",
|
||
"Appreciation ☺️",
|
||
"Meme!😋",
|
||
"Lore📚✍️",
|
||
"Hunter😈",
|
||
"Truth or Dare - NNYC Edition - 🥵",
|
||
"Truth or Dare - NNYC Edition - 💞",
|
||
"Triumph Diary",
|
||
"Defeat Diary",
|
||
"cursed meme🙃👿",
|
||
"Gamble 🎰🎲🃏",
|
||
"DDD Challenge",
|
||
"Holly Jolly Time 🎄⛄🦌🎁🍪🌟",
|
||
"Happy New Year 🎊🥂🎉🍀",
|
||
"Easter Egg-Stravaganza 🐰🐇",
|
||
"[Game] - Would you rather...⁉️",
|
||
":pokeball::pokeball: Gotta catch 'em all :pokeball::pokeball:",
|
||
"Will you be my Valentine 💖❤️💌"
|
||
]
|
||
|
||
class StatsHelper:
|
||
|
||
def __init__(self):
|
||
print("🎄 Initializing StatsHelper...")
|
||
|
||
def get_next_month_start(self, year, month):
|
||
"""Helper to calculate the first day of the NEXT month."""
|
||
if month == 12:
|
||
return year + 1, 1
|
||
else:
|
||
return year, month + 1
|
||
|
||
def process_batch(self, batch_name, post_list):
|
||
"""
|
||
Helper function to process a list of posts and print stats.
|
||
"""
|
||
print(f"\n🔵 PROCESSING: {batch_name} (Limit: ~1000)")
|
||
print("-" * 60)
|
||
|
||
unknown_flairs = Counter()
|
||
matched_count = 0
|
||
no_flair_count = 0
|
||
total_scanned = 0
|
||
|
||
for post in post_list:
|
||
total_scanned += 1
|
||
flair = post.link_flair_text
|
||
|
||
# Check 1: No Flair
|
||
if not flair:
|
||
no_flair_count += 1
|
||
continue
|
||
|
||
# Check 2: Known List
|
||
if flair in AVAILABLE_FLAIRS:
|
||
matched_count += 1
|
||
else:
|
||
unknown_flairs[flair] += 1
|
||
|
||
# --- REPORT FOR THIS BATCH ---
|
||
print(f"Total Retrieved: {total_scanned}")
|
||
print(f"✅ Known Flairs: {matched_count}")
|
||
print(f"👻 No Flair: {no_flair_count}")
|
||
|
||
if unknown_flairs:
|
||
print(f"⚠️ UNKNOWN FLAIRS: {sum(unknown_flairs.values())}")
|
||
print(f" (Check these for typos/spaces)")
|
||
for f, c in unknown_flairs.most_common():
|
||
# Use repr() to see hidden spaces, e.g. 'Meme '
|
||
print(f" {repr(f):<40} : {c}")
|
||
else:
|
||
print("🎉 No unknown flairs found.")
|
||
print("-" * 60)
|
||
|
||
def analyze(self):
|
||
reddit = praw.Reddit(
|
||
client_id=config.eventBotClientId,
|
||
client_secret=config.eventBotSecret,
|
||
user_agent=config.user_agent,
|
||
username=config.megathread_username,
|
||
password=config.megathread_password
|
||
)
|
||
# Trackers
|
||
unknown_flairs = Counter()
|
||
no_flair_count = 0
|
||
matched_count = 0
|
||
|
||
subreddit = reddit.subreddit(config.subreddit_name)
|
||
|
||
# # We use a dictionary of Counters:
|
||
# # Structure: { "2023-10": Counter({'FlairA': 10, 'FlairB': 2}), "2023-11": ... }
|
||
# monthly_stats = defaultdict(Counter)
|
||
# # We will also create a counter for the TOTAL stats across all months
|
||
# grand_total_stats = Counter()
|
||
#
|
||
# print(f"Starting precise calendar analysis for r/{config.subreddit_name}...")
|
||
#
|
||
# total_posts_processed = 0
|
||
#
|
||
# for available_flair in AVAILABLE_FLAIRS:
|
||
# # Prepare the query
|
||
# # syntax='cloudsearch' requires the timestamp field
|
||
# # include_over_18 ensures we see posts even if sub is flagged mature
|
||
# query = f'flair:"{available_flair}"'
|
||
# print(f"Checking {available_flair}...")
|
||
#
|
||
# try:
|
||
# posts = list(subreddit.search(
|
||
# query,
|
||
# limit=None,
|
||
# syntax="cloudsearch",
|
||
# params={'include_over_18': 'on'}
|
||
# ))
|
||
#
|
||
# overall_count = len(posts)
|
||
# print(f"Found {overall_count} posts for {available_flair}...")
|
||
#
|
||
# count_for_flair = 0
|
||
# for post in posts:
|
||
# # 2. Convert timestamp to "YYYY-MM" string key
|
||
# dt = datetime.fromtimestamp(post.created_utc, timezone.utc)
|
||
# month_key = dt.strftime('%Y-%m')
|
||
#
|
||
# # 3. Add to stats
|
||
# # We use 'available_flair' to ensure the naming is consistent
|
||
# # (even if the post has slightly messed up encoding)
|
||
# monthly_stats[month_key][available_flair] += 1
|
||
# # 4. Add to Grand Total stats
|
||
# grand_total_stats[available_flair] += 1
|
||
# total_posts_processed += 1
|
||
# count_for_flair += 1
|
||
#
|
||
# if count_for_flair == 0:
|
||
# print("⚠️ (0 in range)")
|
||
# else:
|
||
# print(f"✅ ({count_for_flair} found)")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"\n❌ Error: {e}")
|
||
#
|
||
# # --- OUTPUT RESULTS ---
|
||
# print("\n" + "=" * 60)
|
||
# print(f"FINAL REPORT: {total_posts_processed} POSTS ANALYZED")
|
||
# print("=" * 60)
|
||
#
|
||
# # Sort keys to ensure chronological printing (2024-12, then 2025-01)
|
||
# sorted_months = sorted(monthly_stats.keys())
|
||
#
|
||
# if not sorted_months:
|
||
# print("No posts found in the specified timeframe.")
|
||
#
|
||
# for month_key in sorted_months:
|
||
# print(f"\n📅 PERIOD: {month_key}")
|
||
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
|
||
# print("-" * 60)
|
||
#
|
||
# total_in_month = sum(monthly_stats[month_key].values())
|
||
#
|
||
# # Sort flairs by popularity within that month
|
||
# for flair, count in monthly_stats[month_key].most_common():
|
||
# percentage = (count / total_in_month) * 100
|
||
#
|
||
# # Truncate flair name if it's too long for the table
|
||
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
|
||
#
|
||
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
|
||
#
|
||
# # --- OUTPUT 2: GRAND TOTAL SUMMARY ---
|
||
# print("\n" + "=" * 60)
|
||
# print(f"PART 2: GRAND TOTAL SUMMARY (ALL MONTHS COMBINED)")
|
||
# print(f"Total Posts: {total_posts_processed}")
|
||
# print("=" * 60)
|
||
#
|
||
# print(f"\n📅 PERIOD: ALL TIME ({sorted_months[0]} to {sorted_months[-1]})")
|
||
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
|
||
# print("-" * 60)
|
||
#
|
||
# for flair, count in grand_total_stats.most_common():
|
||
# # Percentage relative to the TOTAL posts analyzed across the whole period
|
||
# percentage = (count / total_posts_processed) * 100
|
||
#
|
||
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
|
||
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
|
||
|
||
|
||
#One more search for potentially missed posts
|
||
try:
|
||
# print("\nFetching .new() ...")
|
||
# posts_new = list(subreddit.new(limit=None))
|
||
# self.process_batch("SUBREDDIT.NEW", posts_new)
|
||
#
|
||
# # 2. FETCH HOT (Algorithm sorted, max ~1000)
|
||
# print("\nFetching .hot() ...")
|
||
# posts_hot = list(subreddit.hot(limit=None))
|
||
# self.process_batch("SUBREDDIT.HOT", posts_hot)
|
||
#
|
||
# # 2. FETCH TOP (Algorithm sorted, max ~1000)
|
||
# print("\nFetching .top() ...")
|
||
# posts_hot = list(subreddit.top(limit=None))
|
||
# self.process_batch("SUBREDDIT.TOP", posts_hot)
|
||
#
|
||
# # 2. FETCH CONTROVERSIAL (Algorithm sorted, max ~1000)
|
||
# print("\nFetching .controversial() ...")
|
||
# posts_hot = list(subreddit.controversial(limit=None))
|
||
# self.process_batch("SUBREDDIT.CONTROVERSIAL", posts_hot)
|
||
#
|
||
# # 2. FETCH RISING (Algorithm sorted, max ~1000)
|
||
# print("\nFetching .rising() ...")
|
||
# posts_hot = list(subreddit.rising(limit=None))
|
||
# self.process_batch("SUBREDDIT.RISING", posts_hot)
|
||
|
||
random_counter=0
|
||
print("\nFetching .random() ...")
|
||
while random_counter < 10000:
|
||
rand_post = subreddit.random()
|
||
|
||
flair = rand_post.link_flair_text
|
||
|
||
# Check 1: No Flair
|
||
if not flair:
|
||
print(f"Found unflaired post => {rand_post.id}")
|
||
continue
|
||
|
||
# Check 2: Known List
|
||
if not flair in AVAILABLE_FLAIRS:
|
||
print(f"Found unexpected flair => {flair}")
|
||
|
||
if random_counter % 10 == 0:
|
||
time.sleep(2)
|
||
|
||
|
||
|
||
except Exception as e:
|
||
print(f"\n❌ Error: {e}")
|