Fix crawler and merger script, fix typo of formatter

This commit is contained in:
Hans5958 2022-04-19 08:55:37 +07:00
parent 0e5df81ea4
commit 37bfe79d3e
3 changed files with 20 additions and 10 deletions

View file

@ -342,7 +342,7 @@ def remove_empty_and_similar(entry: dict):
if "links" in entry:
for key in entry["li/nks"]:
for key in entry["links"]:
small = list(map(lambda x: x.lower(), entry["links"][key]))
entry["links"][key] = [x for x in entry["links"][key] if x and x.lower() in small]

View file

@ -30,23 +30,32 @@
if entry['id'] in out_dupe_ids:
continue
if ('edit' in entry and entry['edit']) or entry['id'] in out_ids:
if 'edit' in entry and entry['edit']:
index = next((i for i, item in enumerate(atlas_json) if item["id"] == entry['id']), None)
assert index != None, "Edit failed! ID not found on Atlas."
print(f"Edited {atlas_json[index]['id']} with {entry['edit']}")
if 'edit' in entry:
out_edited_added_ids.append(entry['edit'])
del entry['edit']
if 'submitted_by' in atlas_json[index]:
atlas_json[index].contributors = [ atlas_json[index]['submitted_by'] ]
atlas_json[index]['contributors'] = [ atlas_json[index]['submitted_by'] ]
elif not 'contributors' in atlas_json[index]:
atlas_json[index]['contributors'] = []
entry['contributors'] = atlas_json[index]['contributors'] + list(set(entry['contributors']) - set(atlas_json[index]['contributors']))
atlas_json[index] = entry
else:
print(f"Added {entry['id']}.")
atlas_json.append(entry)
print('Writing...')
with open('../web/atlas.json', 'w', encoding='utf-8') as atlas_file:
atlas_file.write(per_line_entries(atlas_json))
with open('../data/edit-ids.txt', 'a', encoding='utf-8') as edit_ids_file:
edit_ids_file.write('\n'.join(out_edited_added_ids) + '\n')
with open('../data/read-ids.txt', 'a', encoding='utf-8') as read_ids_file:
with open('read-ids-temp.txt', 'r', encoding='utf-8') as read_ids_temp_file:
read_ids_file.writelines(read_ids_temp_file.readlines())
print('All done.')

View file

@ -105,7 +105,7 @@ def set_flair(submission, flair):
rawtext = text
text = text.replace('\u200c', '')
text = re.compile(r".*(\{.+\}).*", re.DOTALL).search(text).group(1)
text = re.compile(r"(\{.+\})", re.DOTALL).search(text).group(0)
# Test if it needs to escape the escape character. Usually happens on fancy mode.
try: json.loads(text)
except json.JSONDecodeError: text = re.sub(r"\\(.)", r"\1", text)
@ -118,7 +118,7 @@ def set_flair(submission, flair):
assert submission_json["id"] != 0, "Edit invalid because ID is tampered, it must not be 0!"
submission_json_dummy = {"id": submission_json["id"], "edit": True}
submission_json_dummy = {"id": submission_json["id"], "edit": submission.id}
submission_json["contributors"] = []
try:
@ -147,7 +147,8 @@ def set_flair(submission, flair):
assert validation_status < 3, \
"Submission invalid after validation. This may be caused by not enough points on the path."
OUT_FILE_LINES[len(OUT_FILE_LINES) - 2].replace('\n', ',\n')
OUT_FILE_LINES.insert(len(OUT_FILE_LINES) - 1, json.dumps(submission_json, ensure_ascii=False) + '\n')
READ_IDS_FILE.write(submission.id + '\n')
successcount += 1
@ -155,7 +156,7 @@ def set_flair(submission, flair):
except Exception as e:
FAIL_FILE.write(
"\n\n" + "="*40 + "\n\n" +
"\n\n" + "="*40 + "\n\nSubmission ID:" +
submission.id + "\n\n" +
traceback.format_exc() + "\n\n" +
"==== RAW ====" + "\n\n" +
@ -166,7 +167,7 @@ def set_flair(submission, flair):
failcount += 1
set_flair(submission, "Rejected Entry")
print("Wrote "+submission.id+", submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
print("Wrote " + submission.id + ", submitted " + str(round(time.time()-submission.created_utc)) + " seconds ago")
totalcount += 1
OUT_FILE.writelines(OUT_FILE_LINES)