Initial commit
This commit is contained in:
248
StatsHelper.py
Normal file
248
StatsHelper.py
Normal file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env python3
|
||||
import praw
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
import config
|
||||
|
||||
# --- SETTINGS ---
|
||||
START_YEAR = 2024
|
||||
START_MONTH = 12 # December
|
||||
|
||||
AVAILABLE_FLAIRS=[
|
||||
"Moderator Announcement 📢",
|
||||
"Horny😈",
|
||||
"Appreciation ☺️",
|
||||
"Meme!😋",
|
||||
"Lore📚✍️",
|
||||
"Hunter😈",
|
||||
"Truth or Dare - NNYC Edition - 🥵",
|
||||
"Truth or Dare - NNYC Edition - 💞",
|
||||
"Triumph Diary",
|
||||
"Defeat Diary",
|
||||
"cursed meme🙃👿",
|
||||
"Gamble 🎰🎲🃏",
|
||||
"DDD Challenge",
|
||||
"Holly Jolly Time 🎄⛄🦌🎁🍪🌟",
|
||||
"Happy New Year 🎊🥂🎉🍀",
|
||||
"Easter Egg-Stravaganza 🐰🐇",
|
||||
"[Game] - Would you rather...⁉️",
|
||||
":pokeball::pokeball: Gotta catch 'em all :pokeball::pokeball:",
|
||||
"Will you be my Valentine 💖❤️💌"
|
||||
]
|
||||
|
||||
class StatsHelper:
|
||||
|
||||
def __init__(self):
|
||||
print("🎄 Initializing StatsHelper...")
|
||||
|
||||
def get_next_month_start(self, year, month):
|
||||
"""Helper to calculate the first day of the NEXT month."""
|
||||
if month == 12:
|
||||
return year + 1, 1
|
||||
else:
|
||||
return year, month + 1
|
||||
|
||||
def process_batch(self, batch_name, post_list):
|
||||
"""
|
||||
Helper function to process a list of posts and print stats.
|
||||
"""
|
||||
print(f"\n🔵 PROCESSING: {batch_name} (Limit: ~1000)")
|
||||
print("-" * 60)
|
||||
|
||||
unknown_flairs = Counter()
|
||||
matched_count = 0
|
||||
no_flair_count = 0
|
||||
total_scanned = 0
|
||||
|
||||
for post in post_list:
|
||||
total_scanned += 1
|
||||
flair = post.link_flair_text
|
||||
|
||||
# Check 1: No Flair
|
||||
if not flair:
|
||||
no_flair_count += 1
|
||||
continue
|
||||
|
||||
# Check 2: Known List
|
||||
if flair in AVAILABLE_FLAIRS:
|
||||
matched_count += 1
|
||||
else:
|
||||
unknown_flairs[flair] += 1
|
||||
|
||||
# --- REPORT FOR THIS BATCH ---
|
||||
print(f"Total Retrieved: {total_scanned}")
|
||||
print(f"✅ Known Flairs: {matched_count}")
|
||||
print(f"👻 No Flair: {no_flair_count}")
|
||||
|
||||
if unknown_flairs:
|
||||
print(f"⚠️ UNKNOWN FLAIRS: {sum(unknown_flairs.values())}")
|
||||
print(f" (Check these for typos/spaces)")
|
||||
for f, c in unknown_flairs.most_common():
|
||||
# Use repr() to see hidden spaces, e.g. 'Meme '
|
||||
print(f" {repr(f):<40} : {c}")
|
||||
else:
|
||||
print("🎉 No unknown flairs found.")
|
||||
print("-" * 60)
|
||||
|
||||
def analyze(self):
|
||||
reddit = praw.Reddit(
|
||||
client_id=config.eventBotClientId,
|
||||
client_secret=config.eventBotSecret,
|
||||
user_agent=config.user_agent,
|
||||
username=config.megathread_username,
|
||||
password=config.megathread_password
|
||||
)
|
||||
# Trackers
|
||||
unknown_flairs = Counter()
|
||||
no_flair_count = 0
|
||||
matched_count = 0
|
||||
|
||||
subreddit = reddit.subreddit(config.subreddit_name)
|
||||
|
||||
# # We use a dictionary of Counters:
|
||||
# # Structure: { "2023-10": Counter({'FlairA': 10, 'FlairB': 2}), "2023-11": ... }
|
||||
# monthly_stats = defaultdict(Counter)
|
||||
# # We will also create a counter for the TOTAL stats across all months
|
||||
# grand_total_stats = Counter()
|
||||
#
|
||||
# print(f"Starting precise calendar analysis for r/{config.subreddit_name}...")
|
||||
#
|
||||
# total_posts_processed = 0
|
||||
#
|
||||
# for available_flair in AVAILABLE_FLAIRS:
|
||||
# # Prepare the query
|
||||
# # syntax='cloudsearch' requires the timestamp field
|
||||
# # include_over_18 ensures we see posts even if sub is flagged mature
|
||||
# query = f'flair:"{available_flair}"'
|
||||
# print(f"Checking {available_flair}...")
|
||||
#
|
||||
# try:
|
||||
# posts = list(subreddit.search(
|
||||
# query,
|
||||
# limit=None,
|
||||
# syntax="cloudsearch",
|
||||
# params={'include_over_18': 'on'}
|
||||
# ))
|
||||
#
|
||||
# overall_count = len(posts)
|
||||
# print(f"Found {overall_count} posts for {available_flair}...")
|
||||
#
|
||||
# count_for_flair = 0
|
||||
# for post in posts:
|
||||
# # 2. Convert timestamp to "YYYY-MM" string key
|
||||
# dt = datetime.fromtimestamp(post.created_utc, timezone.utc)
|
||||
# month_key = dt.strftime('%Y-%m')
|
||||
#
|
||||
# # 3. Add to stats
|
||||
# # We use 'available_flair' to ensure the naming is consistent
|
||||
# # (even if the post has slightly messed up encoding)
|
||||
# monthly_stats[month_key][available_flair] += 1
|
||||
# # 4. Add to Grand Total stats
|
||||
# grand_total_stats[available_flair] += 1
|
||||
# total_posts_processed += 1
|
||||
# count_for_flair += 1
|
||||
#
|
||||
# if count_for_flair == 0:
|
||||
# print("⚠️ (0 in range)")
|
||||
# else:
|
||||
# print(f"✅ ({count_for_flair} found)")
|
||||
#
|
||||
# except Exception as e:
|
||||
# print(f"\n❌ Error: {e}")
|
||||
#
|
||||
# # --- OUTPUT RESULTS ---
|
||||
# print("\n" + "=" * 60)
|
||||
# print(f"FINAL REPORT: {total_posts_processed} POSTS ANALYZED")
|
||||
# print("=" * 60)
|
||||
#
|
||||
# # Sort keys to ensure chronological printing (2024-12, then 2025-01)
|
||||
# sorted_months = sorted(monthly_stats.keys())
|
||||
#
|
||||
# if not sorted_months:
|
||||
# print("No posts found in the specified timeframe.")
|
||||
#
|
||||
# for month_key in sorted_months:
|
||||
# print(f"\n📅 PERIOD: {month_key}")
|
||||
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
|
||||
# print("-" * 60)
|
||||
#
|
||||
# total_in_month = sum(monthly_stats[month_key].values())
|
||||
#
|
||||
# # Sort flairs by popularity within that month
|
||||
# for flair, count in monthly_stats[month_key].most_common():
|
||||
# percentage = (count / total_in_month) * 100
|
||||
#
|
||||
# # Truncate flair name if it's too long for the table
|
||||
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
|
||||
#
|
||||
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
|
||||
#
|
||||
# # --- OUTPUT 2: GRAND TOTAL SUMMARY ---
|
||||
# print("\n" + "=" * 60)
|
||||
# print(f"PART 2: GRAND TOTAL SUMMARY (ALL MONTHS COMBINED)")
|
||||
# print(f"Total Posts: {total_posts_processed}")
|
||||
# print("=" * 60)
|
||||
#
|
||||
# print(f"\n📅 PERIOD: ALL TIME ({sorted_months[0]} to {sorted_months[-1]})")
|
||||
# print(f"{'FLAIR':<40} | {'COUNT':<5} | {'%'}")
|
||||
# print("-" * 60)
|
||||
#
|
||||
# for flair, count in grand_total_stats.most_common():
|
||||
# # Percentage relative to the TOTAL posts analyzed across the whole period
|
||||
# percentage = (count / total_posts_processed) * 100
|
||||
#
|
||||
# display_flair = (flair[:37] + '..') if len(flair) > 37 else flair
|
||||
# print(f"{display_flair:<40} | {count:<5} | {percentage:.1f}%")
|
||||
|
||||
|
||||
#One more search for potentially missed posts
|
||||
try:
|
||||
# print("\nFetching .new() ...")
|
||||
# posts_new = list(subreddit.new(limit=None))
|
||||
# self.process_batch("SUBREDDIT.NEW", posts_new)
|
||||
#
|
||||
# # 2. FETCH HOT (Algorithm sorted, max ~1000)
|
||||
# print("\nFetching .hot() ...")
|
||||
# posts_hot = list(subreddit.hot(limit=None))
|
||||
# self.process_batch("SUBREDDIT.HOT", posts_hot)
|
||||
#
|
||||
# # 2. FETCH TOP (Algorithm sorted, max ~1000)
|
||||
# print("\nFetching .top() ...")
|
||||
# posts_hot = list(subreddit.top(limit=None))
|
||||
# self.process_batch("SUBREDDIT.TOP", posts_hot)
|
||||
#
|
||||
# # 2. FETCH CONTROVERSIAL (Algorithm sorted, max ~1000)
|
||||
# print("\nFetching .controversial() ...")
|
||||
# posts_hot = list(subreddit.controversial(limit=None))
|
||||
# self.process_batch("SUBREDDIT.CONTROVERSIAL", posts_hot)
|
||||
#
|
||||
# # 2. FETCH RISING (Algorithm sorted, max ~1000)
|
||||
# print("\nFetching .rising() ...")
|
||||
# posts_hot = list(subreddit.rising(limit=None))
|
||||
# self.process_batch("SUBREDDIT.RISING", posts_hot)
|
||||
|
||||
random_counter=0
|
||||
print("\nFetching .random() ...")
|
||||
while random_counter < 10000:
|
||||
rand_post = subreddit.random()
|
||||
|
||||
flair = rand_post.link_flair_text
|
||||
|
||||
# Check 1: No Flair
|
||||
if not flair:
|
||||
print(f"Found unflaired post => {rand_post.id}")
|
||||
continue
|
||||
|
||||
# Check 2: Known List
|
||||
if not flair in AVAILABLE_FLAIRS:
|
||||
print(f"Found unexpected flair => {flair}")
|
||||
|
||||
if random_counter % 10 == 0:
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
Reference in New Issue
Block a user