atlas/tools/redditcrawl.py
2022-04-07 17:00:30 -03:00

113 lines
3.7 KiB
Python
Executable file

import praw
import json
import time
import re
import os
outfile = open('temp_atlas.json', 'w', encoding='utf-8')
failfile = open('manual_atlas.json', 'w', encoding='utf-8')
credentials = open('credentials', 'r')
client_id = credentials.readline().strip(' \t\n\r')
client_secret = credentials.readline().strip(' \t\n\r')
user = credentials.readline().strip(' \t\n\r')
pw = credentials.readline().strip(' \t\n\r')
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent='atlas_bot',username=user,password=pw)
has_write_access = not reddit.read_only
if not has_write_access:
print("Warning: No write access. Post flairs will not be updated")
sleep(5)
jsonfile = open("../web/atlas.json", "r", encoding='utf-8')
existing = json.load(jsonfile)
existing_ids = []
for item in existing:
existing_ids.append(item['id'])
def set_flair(submission, flair):
if has_write_access and submission.link_flair_text != flair:
flair_choices = submission.flair.choices()
flair = next(x for x in flair_choices if x["flair_text_editable"] and flair == x["flair_text"])
submission.flair.select(flair["flair_template_id"])
total_all_flairs = 0
duplicate_count = 0
failcount = 0
successcount = 0
totalcount = 0
outfile.write("[\n")
for submission in reddit.subreddit('placeAtlas2').new(limit=2000):
"""
Auth setup
1. Head to https://www.reddit.com/prefs/apps
2. Click "create another app"
3. Give it a name and description
4. Select "script"
5. Redirect to http://localhost:8080
6. Copy ID (under Personal Use Script)
7. Append to file called "credentials"
8. Copy Secret
9. Append on newline to "credentials" file
10. If you want flair write access append 2 newlines with username and password (Must be a mod, don't do this if you don't know what you're doing)
11. Run Script
Running Script
1. Input the next ID to use
2. Manually resolve errors in manual_atlas.json
3. Copy temp_atlas.json entries into web/_js/atlas.js
4. Pull Request
"""
total_all_flairs += 1
if (submission.id in existing_ids):
set_flair(submission, "Processed Entry")
print("Found first duplicate!")
duplicate_count += 1
if (duplicate_count > 0):
break
else:
continue
if(submission.link_flair_text == "New Entry"):
text = submission.selftext
#Old backslash filter:
#text = text.replace("\\", "")
#New one: One \\ escapes a backslash in python's parser
# Two escape it again in the regex parser, so \\\\ is \
# Then anything but " or n is replaced with the first capture group (anything but " or n)
# Test in repl: re.sub("\\\\([^\"n])", "\\1", "\\t < removed slash, t stays and > stays \\n \\\"")
text = re.sub("\\\\([^\"n])", "\\1", text)
try:
text = text.replace("\"id\": 0,", "\"id\": 0,\n\t\t\"submitted_by\": \""+submission.author.name+"\",")
except AttributeError:
text = text.replace("\"id\": 0,", "\"id\": 0,\n\t\t\"submitted_by\": \""+"unknown"+"\",")
lines = text.split("\n")
for i, line in enumerate(lines):
if("\"id\": 0" in line):
lines[i] = line.replace("\"id\": 0", "\"id\": "+"\""+str(submission.id)+"\"")
text = "\n".join(lines)
try:
outfile.write(json.dumps(json.loads(text))+" ,\n")
successcount += 1
set_flair(submission, "Processed Entry")
except json.JSONDecodeError:
failfile.write(text+",\n")
failcount += 1
set_flair(submission, "Rejected Entry")
print("written "+submission.id+" submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
totalcount += 1
# Remove ,\n
outfile.seek(outfile.tell()-4, os.SEEK_SET)
outfile.truncate()
outfile.write("\n]")
print(f"\n\nTotal all flairs:{total_all_flairs}\nSuccess: {successcount}/{totalcount}\nFail: {failcount}/{totalcount}\nPlease check manual_atlas.txt for failed entries to manually resolve.")