mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-06-02 03:19:45 +02:00
Little rewrite on migrator, migrate submissions if needed, add writing log
This commit is contained in:
parent
5f7a54010f
commit
710d4a7c6a
|
@ -462,7 +462,7 @@ def go(path):
|
|||
if not (i % 200):
|
||||
print(f"{i} checked.")
|
||||
|
||||
print(f"{len(entries)} checked.")
|
||||
print(f"{len(entries)} checked. Writing...")
|
||||
|
||||
with open(path, "w", encoding='utf-8', newline='\n') as f2:
|
||||
f2.write(per_line_entries(entries))
|
||||
|
|
|
@ -1,24 +1,14 @@
|
|||
import os
|
||||
import json
|
||||
import re
|
||||
#!/usr/bin/python
|
||||
|
||||
'''
|
||||
"""
|
||||
Migrator script from old atlas format to remastered atlas format.
|
||||
- center and path: single -> time-specific
|
||||
- website and subreddit: single strings -> links object
|
||||
- submitted_by -> contributors
|
||||
'''
|
||||
#
|
||||
"""
|
||||
|
||||
# Migrates the old atlas format (single center/path) to the remastered atlas format (time-boxed centers/paths)
|
||||
|
||||
def per_line_entries(entries: list):
|
||||
out = '[\n'
|
||||
for entry in entries:
|
||||
out += json.dumps(entry, ensure_ascii=False) + ',\n'
|
||||
return out[:-2] + '\n]'
|
||||
|
||||
file_path = os.path.join('..', 'web', 'atlas.json')
|
||||
import re
|
||||
import json
|
||||
|
||||
END_IMAGE = 166
|
||||
INIT_CANVAS_RANGE = (1, END_IMAGE)
|
||||
|
@ -28,78 +18,97 @@ def per_line_entries(entries: list):
|
|||
COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +')
|
||||
FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*')
|
||||
|
||||
with open(file_path, 'r+', encoding='UTF-8') as file:
|
||||
entries = json.loads(file.read())
|
||||
def migrate_atlas_format(entry: dict):
|
||||
new_entry = {
|
||||
"id": "",
|
||||
"name": "",
|
||||
"description": "",
|
||||
"links": {},
|
||||
"center": {},
|
||||
"path": {},
|
||||
"contributors": []
|
||||
}
|
||||
|
||||
index = 0
|
||||
center = entry['center']
|
||||
path = entry['path']
|
||||
|
||||
for entry in entries:
|
||||
new_entry = {
|
||||
"id": "",
|
||||
"name": "",
|
||||
"description": "",
|
||||
"links": {},
|
||||
"center": {},
|
||||
"path": {},
|
||||
"contributors": []
|
||||
}
|
||||
if isinstance(center, list):
|
||||
|
||||
# Use the center to figure out which canvas expansion the entry is in.
|
||||
if center[1] > 1000:
|
||||
time_range = EXPANSION_2_RANGE
|
||||
elif center[0] > 1000:
|
||||
time_range = EXPANSION_1_RANGE
|
||||
else:
|
||||
time_range = INIT_CANVAS_RANGE
|
||||
|
||||
center = entry['center']
|
||||
path = entry['path']
|
||||
time_key = '%d-%d, T:0' % time_range
|
||||
|
||||
if isinstance(center, list):
|
||||
|
||||
# Use the center to figure out which canvas expansion the entry is in.
|
||||
if center[1] > 1000:
|
||||
time_range = EXPANSION_2_RANGE
|
||||
elif center[0] > 1000:
|
||||
time_range = EXPANSION_1_RANGE
|
||||
else:
|
||||
time_range = INIT_CANVAS_RANGE
|
||||
new_entry = {
|
||||
**new_entry,
|
||||
"center": {
|
||||
time_key: center
|
||||
},
|
||||
"path": {
|
||||
time_key: path
|
||||
}
|
||||
}
|
||||
|
||||
time_key = '%d-%d, T:0' % time_range
|
||||
del entry['center']
|
||||
del entry['path']
|
||||
|
||||
new_entry = {
|
||||
**new_entry,
|
||||
"center": {
|
||||
time_key: center
|
||||
},
|
||||
"path": {
|
||||
time_key: path
|
||||
}
|
||||
}
|
||||
if "website" in entry:
|
||||
if isinstance(entry["website"], str) and entry["website"]:
|
||||
new_entry['links']['website'] = [entry['website']]
|
||||
del entry['website']
|
||||
|
||||
del entry['center']
|
||||
del entry['path']
|
||||
if "subreddit" in entry:
|
||||
if isinstance(entry["subreddit"], str) and entry["subreddit"]:
|
||||
new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
|
||||
del entry['subreddit']
|
||||
|
||||
if "website" in entry:
|
||||
if isinstance(entry["website"], str) and entry["website"]:
|
||||
new_entry['links']['website'] = [entry['website']]
|
||||
del entry['website']
|
||||
if "submitted_by" in entry:
|
||||
new_entry['contributors'].append(entry['submitted_by'])
|
||||
del entry['submitted_by']
|
||||
|
||||
toreturn = {
|
||||
**new_entry,
|
||||
**entry
|
||||
}
|
||||
|
||||
if "subreddit" in entry:
|
||||
if isinstance(entry["subreddit"], str) and entry["subreddit"]:
|
||||
new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
|
||||
del entry['subreddit']
|
||||
return toreturn
|
||||
|
||||
if "submitted_by" in entry:
|
||||
new_entry['contributors'].append(entry['submitted_by'])
|
||||
del entry['submitted_by']
|
||||
|
||||
entries[index] = {
|
||||
**new_entry,
|
||||
**entry
|
||||
}
|
||||
def per_line_entries(entries: list):
|
||||
"""
|
||||
Returns a string of all the entries, with every entry in one line.
|
||||
"""
|
||||
out = "[\n"
|
||||
for entry in entries:
|
||||
if entry:
|
||||
out += json.dumps(entry, ensure_ascii=False) + ",\n"
|
||||
out = out[:-2] + "\n]"
|
||||
return out
|
||||
|
||||
index += 1
|
||||
if __name__ == '__main__':
|
||||
|
||||
if not (index % 1000):
|
||||
print(f"{index} checked.")
|
||||
def go(path):
|
||||
|
||||
print(f"{len(entries)} checked.")
|
||||
print("Writing...")
|
||||
print(f"Formatting {path}...")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8', newline='\n') as f2:
|
||||
f2.write(per_line_entries(entries))
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
entries = json.loads(f1.read())
|
||||
|
||||
print("All done!")
|
||||
for i in range(len(entries)):
|
||||
entry_formatted = migrate_atlas_format(entries[i])
|
||||
entries[i] = entry_formatted
|
||||
if not (i % 1000):
|
||||
print(f"{i} checked.")
|
||||
|
||||
print(f"{len(entries)} checked. Writing...")
|
||||
|
||||
with open(path, "w", encoding='utf-8', newline='\n') as f2:
|
||||
f2.write(per_line_entries(entries))
|
||||
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
import re
|
||||
import traceback
|
||||
from formatter import format_all
|
||||
from migrate_atlas_format import migrate_atlas_format
|
||||
|
||||
OUT_FILE = open('temp_atlas.json', 'w', encoding='utf-8')
|
||||
READ_IDS_FILE = open('read-ids-temp.txt', 'w')
|
||||
|
@ -147,6 +148,8 @@ def set_flair(submission, flair):
|
|||
|
||||
assert validation_status < 3, \
|
||||
"Submission invalid after validation. This may be caused by not enough points on the path."
|
||||
|
||||
submission_json = migrate_atlas_format(submission_json)
|
||||
|
||||
OUT_FILE_LINES[len(OUT_FILE_LINES) - 2].replace('\n', ',\n')
|
||||
OUT_FILE_LINES.insert(len(OUT_FILE_LINES) - 1, json.dumps(submission_json, ensure_ascii=False) + '\n')
|
||||
|
|
Loading…
Reference in a new issue