mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-09-27 12:39:18 +02:00
Fix crawler and merger script, fix typo of formatter
This commit is contained in:
parent
0e5df81ea4
commit
37bfe79d3e
3 changed files with 20 additions and 10 deletions
|
@ -342,7 +342,7 @@ def remove_empty_and_similar(entry: dict):
|
|||
|
||||
if "links" in entry:
|
||||
|
||||
for key in entry["li/nks"]:
|
||||
for key in entry["links"]:
|
||||
small = list(map(lambda x: x.lower(), entry["links"][key]))
|
||||
entry["links"][key] = [x for x in entry["links"][key] if x and x.lower() in small]
|
||||
|
||||
|
|
|
@ -30,23 +30,32 @@
|
|||
if entry['id'] in out_dupe_ids:
|
||||
continue
|
||||
|
||||
if ('edit' in entry and entry['edit']) or entry['id'] in out_ids:
|
||||
if 'edit' in entry and entry['edit']:
|
||||
index = next((i for i, item in enumerate(atlas_json) if item["id"] == entry['id']), None)
|
||||
|
||||
assert index != None, "Edit failed! ID not found on Atlas."
|
||||
|
||||
print(f"Edited {atlas_json[index]['id']} with {entry['edit']}")
|
||||
|
||||
if 'edit' in entry:
|
||||
out_edited_added_ids.append(entry['edit'])
|
||||
del entry['edit']
|
||||
if 'submitted_by' in atlas_json[index]:
|
||||
atlas_json[index].contributors = [ atlas_json[index]['submitted_by'] ]
|
||||
atlas_json[index]['contributors'] = [ atlas_json[index]['submitted_by'] ]
|
||||
elif not 'contributors' in atlas_json[index]:
|
||||
atlas_json[index]['contributors'] = []
|
||||
entry['contributors'] = atlas_json[index]['contributors'] + list(set(entry['contributors']) - set(atlas_json[index]['contributors']))
|
||||
atlas_json[index] = entry
|
||||
else:
|
||||
print(f"Added {entry['id']}.")
|
||||
atlas_json.append(entry)
|
||||
|
||||
print('Writing...')
|
||||
with open('../web/atlas.json', 'w', encoding='utf-8') as atlas_file:
|
||||
atlas_file.write(per_line_entries(atlas_json))
|
||||
|
||||
with open('../data/edit-ids.txt', 'a', encoding='utf-8') as edit_ids_file:
|
||||
edit_ids_file.write('\n'.join(out_edited_added_ids) + '\n')
|
||||
with open('../data/read-ids.txt', 'a', encoding='utf-8') as read_ids_file:
|
||||
with open('read-ids-temp.txt', 'r', encoding='utf-8') as read_ids_temp_file:
|
||||
read_ids_file.writelines(read_ids_temp_file.readlines())
|
||||
|
||||
print('All done.')
|
|
@ -105,7 +105,7 @@ def set_flair(submission, flair):
|
|||
rawtext = text
|
||||
|
||||
text = text.replace('\u200c', '')
|
||||
text = re.compile(r".*(\{.+\}).*", re.DOTALL).search(text).group(1)
|
||||
text = re.compile(r"(\{.+\})", re.DOTALL).search(text).group(0)
|
||||
# Test if it needs to escape the escape character. Usually happens on fancy mode.
|
||||
try: json.loads(text)
|
||||
except json.JSONDecodeError: text = re.sub(r"\\(.)", r"\1", text)
|
||||
|
@ -118,7 +118,7 @@ def set_flair(submission, flair):
|
|||
|
||||
assert submission_json["id"] != 0, "Edit invalid because ID is tampered, it must not be 0!"
|
||||
|
||||
submission_json_dummy = {"id": submission_json["id"], "edit": True}
|
||||
submission_json_dummy = {"id": submission_json["id"], "edit": submission.id}
|
||||
submission_json["contributors"] = []
|
||||
|
||||
try:
|
||||
|
@ -147,7 +147,8 @@ def set_flair(submission, flair):
|
|||
|
||||
assert validation_status < 3, \
|
||||
"Submission invalid after validation. This may be caused by not enough points on the path."
|
||||
|
||||
|
||||
OUT_FILE_LINES[len(OUT_FILE_LINES) - 2].replace('\n', ',\n')
|
||||
OUT_FILE_LINES.insert(len(OUT_FILE_LINES) - 1, json.dumps(submission_json, ensure_ascii=False) + '\n')
|
||||
READ_IDS_FILE.write(submission.id + '\n')
|
||||
successcount += 1
|
||||
|
@ -155,7 +156,7 @@ def set_flair(submission, flair):
|
|||
|
||||
except Exception as e:
|
||||
FAIL_FILE.write(
|
||||
"\n\n" + "="*40 + "\n\n" +
|
||||
"\n\n" + "="*40 + "\n\nSubmission ID:" +
|
||||
submission.id + "\n\n" +
|
||||
traceback.format_exc() + "\n\n" +
|
||||
"==== RAW ====" + "\n\n" +
|
||||
|
@ -166,7 +167,7 @@ def set_flair(submission, flair):
|
|||
failcount += 1
|
||||
set_flair(submission, "Rejected Entry")
|
||||
|
||||
print("Wrote "+submission.id+", submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
|
||||
print("Wrote " + submission.id + ", submitted " + str(round(time.time()-submission.created_utc)) + " seconds ago")
|
||||
totalcount += 1
|
||||
|
||||
OUT_FILE.writelines(OUT_FILE_LINES)
|
||||
|
|
Loading…
Reference in a new issue