mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-12-29 20:54:48 +01:00
Merge pull request #829 from nico-abram/valid_temp_json
Make redditcrawl create a valid json for atlas_temp
This commit is contained in:
commit
3b50b0950f
1 changed files with 18 additions and 5 deletions
|
@ -3,6 +3,7 @@
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
outfile = open('temp_atlas.json', 'w', encoding='utf-8')
|
outfile = open('temp_atlas.json', 'w', encoding='utf-8')
|
||||||
failfile = open('manual_atlas.json', 'w', encoding='utf-8')
|
failfile = open('manual_atlas.json', 'w', encoding='utf-8')
|
||||||
|
@ -25,7 +26,9 @@
|
||||||
for item in existing:
|
for item in existing:
|
||||||
existing_ids.append(item['id'])
|
existing_ids.append(item['id'])
|
||||||
|
|
||||||
|
total_all_flairs = 0
|
||||||
|
duplicate_count = 0
|
||||||
|
outfile.write("[\n")
|
||||||
for submission in reddit.subreddit('placeAtlas2').new(limit=2000):
|
for submission in reddit.subreddit('placeAtlas2').new(limit=2000):
|
||||||
"""
|
"""
|
||||||
Auth setup
|
Auth setup
|
||||||
|
@ -47,10 +50,14 @@
|
||||||
4. Pull Request
|
4. Pull Request
|
||||||
|
|
||||||
"""
|
"""
|
||||||
#print(dir(submission))
|
total_all_flairs += 1
|
||||||
if (submission.id in existing_ids):
|
if (submission.id in existing_ids):
|
||||||
print("Found first duplicate!")
|
print("Found first duplicate!")
|
||||||
break
|
duplicate_count += 1
|
||||||
|
if (duplicate_count > 50):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
if(submission.link_flair_text == "New Entry"):
|
if(submission.link_flair_text == "New Entry"):
|
||||||
text = submission.selftext
|
text = submission.selftext
|
||||||
#Old backslash filter:
|
#Old backslash filter:
|
||||||
|
@ -73,7 +80,7 @@
|
||||||
lines[i] = line.replace("\"id\": 0", "\"id\": "+"\""+str(submission.id)+"\"")
|
lines[i] = line.replace("\"id\": 0", "\"id\": "+"\""+str(submission.id)+"\"")
|
||||||
text = "\n".join(lines)
|
text = "\n".join(lines)
|
||||||
try:
|
try:
|
||||||
outfile.write(json.dumps(json.loads(text))+",\n")
|
outfile.write(json.dumps(json.loads(text))+" ,\n")
|
||||||
successcount += 1
|
successcount += 1
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
failfile.write(text+",\n")
|
failfile.write(text+",\n")
|
||||||
|
@ -81,4 +88,10 @@
|
||||||
print("written "+submission.id+" submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
|
print("written "+submission.id+" submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
|
||||||
totalcount += 1
|
totalcount += 1
|
||||||
|
|
||||||
print(f"\n\nSuccess: {successcount}/{totalcount}\nFail: {failcount}/{totalcount}\nPlease check manual_atlas.txt for failed entries to manually resolve.")
|
# Remove ,\n
|
||||||
|
outfile.seek(outfile.tell()-4, os.SEEK_SET)
|
||||||
|
outfile.truncate()
|
||||||
|
|
||||||
|
outfile.write("\n]")
|
||||||
|
|
||||||
|
print(f"\n\nTotal all flairs:{total_all_flairs}\nSuccess: {successcount}/{totalcount}\nFail: {failcount}/{totalcount}\nPlease check manual_atlas.txt for failed entries to manually resolve.")
|
||||||
|
|
Loading…
Reference in a new issue