atlas/tools/redditcrawl.py

46 lines
1.3 KiB
Python
Raw Normal View History

2017-04-04 20:12:45 +02:00
import praw
2022-04-04 18:49:57 +02:00
import json
2017-04-04 20:12:45 +02:00
2022-04-04 18:49:57 +02:00
outfile = open('temp_atlas.json', 'w')
failfile = open('manual_atlas.json', 'w')
2017-04-04 20:12:45 +02:00
credentials = open('credentials', 'r')
client_id = credentials.readline().strip(' \t\n\r')
client_secret = credentials.readline().strip(' \t\n\r')
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent='atlas_bot')
2022-04-04 18:49:57 +02:00
failcount = 0
successcount = 0
latestID = int(input("Latest ID: "))
for submission in reddit.subreddit('placeAtlas2').new(limit=220):
#print(dir(submission))
2017-04-04 20:12:45 +02:00
if(submission.link_flair_text == "New Entry"):
text = submission.selftext
2022-04-04 18:49:57 +02:00
text = text.replace("\\", "")
text = text.replace("\"id\": 0,", "\"id\": 0,\n\t\t\"submitted_by\": \""+submission.author.name+"\",")
lines = text.split("\n")
text = "\n".join(lines)
2022-04-04 19:03:25 +02:00
for i, line in enumerate(lines):
if("\"id\": 0" in line):
lines[i] = line.replace("\"id\": 0", "\"id\": "+str(latestID))
latestID = latestID + 1
2022-04-04 18:49:57 +02:00
try:
outfile.write(json.dumps(json.loads(text))+",\n")
except json.JSONDecodeError:
print("Errored "+submission.title)
failfile.write(text+",\n")
failcount += 1
2017-04-04 20:12:45 +02:00
print("written "+submission.title)
2022-04-04 18:49:57 +02:00
successcount += 1
2017-04-04 20:12:45 +02:00
else:
print("skipped "+submission.title)
2022-04-04 18:49:57 +02:00
print(f"\n\nSuccess: {successcount}\nFail: {failcount}\nPlease check manual_atlas.txt for failed entries to manually resolve.")