mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-11-15 14:33:36 +01:00
update crawler to reject already submitted
This commit is contained in:
parent
6f9fd822c9
commit
3e2f998116
1 changed files with 10 additions and 4 deletions
|
@ -14,9 +14,16 @@
|
|||
failcount = 0
|
||||
successcount = 0
|
||||
|
||||
latestID = int(input("Latest ID: "))
|
||||
jsonfile = open("../web/atlas.json", "r")
|
||||
existing = json.load(jsonfile)
|
||||
|
||||
for submission in reddit.subreddit('placeAtlas2').new(limit=1100):
|
||||
existing_ids = []
|
||||
|
||||
for item in existing:
|
||||
existing_ids.append(item['id'])
|
||||
|
||||
|
||||
for submission in reddit.subreddit('placeAtlas2').new(limit=1500):
|
||||
"""
|
||||
Auth setup
|
||||
1. Head to https://www.reddit.com/prefs/apps
|
||||
|
@ -38,7 +45,7 @@
|
|||
|
||||
"""
|
||||
#print(dir(submission))
|
||||
if(submission.link_flair_text == "New Entry"):
|
||||
if(submission.link_flair_text == "New Entry" and submission.id not in existing_ids):
|
||||
print(submission.id)
|
||||
text = submission.selftext
|
||||
text = text.replace("\\", "")
|
||||
|
@ -53,7 +60,6 @@
|
|||
for i, line in enumerate(lines):
|
||||
if("\"id\": 0" in line):
|
||||
lines[i] = line.replace("\"id\": 0", "\"id\": "+"\""+str(submission.id)+"\"")
|
||||
latestID = latestID + 1
|
||||
text = "\n".join(lines)
|
||||
try:
|
||||
outfile.write(json.dumps(json.loads(text))+",\n")
|
||||
|
|
Loading…
Reference in a new issue