mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-09-27 20:48:56 +02:00
Also convert to the new links format, some other edits
This commit is contained in:
parent
ce34865053
commit
b21fbff228
1 changed files with 70 additions and 15 deletions
|
@ -1,5 +1,13 @@
|
|||
import os
|
||||
import json
|
||||
import re
|
||||
|
||||
'''
|
||||
Migrator script from old atlas format to remastered atlas format.
|
||||
- center and path: single -> time-specific
|
||||
- website and subreddit: single strings -> links object
|
||||
'''
|
||||
#
|
||||
|
||||
# Migrates the old atlas format (single center/path) to the remastered atlas format (time-boxed centers/paths)
|
||||
|
||||
|
@ -11,34 +19,81 @@ def per_line_entries(entries: list):
|
|||
|
||||
file_path = os.path.join('..', 'web', 'atlas.json')
|
||||
|
||||
end_image = 167
|
||||
init_canvas_range = (1, end_image)
|
||||
expansion_1_range = (56, end_image)
|
||||
expansion_2_range = (109, end_image)
|
||||
END_IMAGE = 166
|
||||
INIT_CANVAS_RANGE = (1, END_IMAGE)
|
||||
EXPANSION_1_RANGE = (56, END_IMAGE)
|
||||
EXPANSION_2_RANGE = (109, END_IMAGE)
|
||||
|
||||
COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +')
|
||||
FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*')
|
||||
|
||||
with open(file_path, 'r+', encoding='UTF-8') as file:
|
||||
entries = json.loads(file.read())
|
||||
|
||||
|
||||
index = 0
|
||||
|
||||
for entry in entries:
|
||||
new_entry = {
|
||||
"id": "",
|
||||
"name": "",
|
||||
"description": "",
|
||||
"links": {},
|
||||
"center": {},
|
||||
"path": {},
|
||||
}
|
||||
|
||||
center = entry['center']
|
||||
path = entry['path']
|
||||
|
||||
if isinstance(center, list):
|
||||
path = entry['path']
|
||||
|
||||
# Use the center to figure out which canvas expansion the entry is in.
|
||||
if center[1] > 1000:
|
||||
time_range = expansion_2_range
|
||||
time_range = EXPANSION_2_RANGE
|
||||
elif center[0] > 1000:
|
||||
time_range = expansion_1_range
|
||||
time_range = EXPANSION_1_RANGE
|
||||
else:
|
||||
time_range = init_canvas_range
|
||||
|
||||
time_range = INIT_CANVAS_RANGE
|
||||
|
||||
time_key = '%d-%d, T:0-2' % time_range
|
||||
entry['center'] = {
|
||||
time_key: center
|
||||
}
|
||||
entry['path'] = {
|
||||
time_key: path
|
||||
|
||||
new_entry = {
|
||||
**new_entry,
|
||||
"center": {
|
||||
time_key: center
|
||||
},
|
||||
"path": {
|
||||
time_key: path
|
||||
}
|
||||
}
|
||||
|
||||
del entry['center']
|
||||
del entry['path']
|
||||
|
||||
if "website" in entry:
|
||||
if isinstance(entry["website"], str) and entry["website"]:
|
||||
new_entry['links']['website'] = [entry['website']]
|
||||
del entry['website']
|
||||
|
||||
if "subreddit" in entry:
|
||||
if isinstance(entry["subreddit"], str) and entry["subreddit"]:
|
||||
new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
|
||||
del entry['subreddit']
|
||||
|
||||
entries[index] = {
|
||||
**new_entry,
|
||||
**entry
|
||||
}
|
||||
|
||||
index += 1
|
||||
|
||||
if not (index % 1000):
|
||||
print(f"{index} checked.")
|
||||
|
||||
print(f"{len(entries)} checked.")
|
||||
print("Writing...")
|
||||
|
||||
with open(file_path, 'w', encoding='utf-8', newline='\n') as f2:
|
||||
f2.write(per_line_entries(entries))
|
||||
|
||||
print("All done!")
|
Loading…
Reference in a new issue