mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-11-15 22:43:13 +01:00
105 lines
No EOL
2.6 KiB
Python
105 lines
No EOL
2.6 KiB
Python
import os
|
|
import json
|
|
import re
|
|
|
|
'''
|
|
Migrator script from old atlas format to remastered atlas format.
|
|
- center and path: single -> time-specific
|
|
- website and subreddit: single strings -> links object
|
|
- submitted_by -> contributors
|
|
'''
|
|
#
|
|
|
|
# Migrates the old atlas format (single center/path) to the remastered atlas format (time-boxed centers/paths)
|
|
|
|
def per_line_entries(entries: list):
|
|
out = '[\n'
|
|
for entry in entries:
|
|
out += json.dumps(entry, ensure_ascii=False) + ',\n'
|
|
return out[:-2] + '\n]'
|
|
|
|
file_path = os.path.join('..', 'web', 'atlas.json')
|
|
|
|
END_IMAGE = 166
|
|
INIT_CANVAS_RANGE = (1, END_IMAGE)
|
|
EXPANSION_1_RANGE = (56, END_IMAGE)
|
|
EXPANSION_2_RANGE = (109, END_IMAGE)
|
|
|
|
COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +')
|
|
FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*')
|
|
|
|
with open(file_path, 'r+', encoding='UTF-8') as file:
|
|
entries = json.loads(file.read())
|
|
|
|
index = 0
|
|
|
|
for entry in entries:
|
|
new_entry = {
|
|
"id": "",
|
|
"name": "",
|
|
"description": "",
|
|
"links": {},
|
|
"center": {},
|
|
"path": {},
|
|
"contributors": []
|
|
}
|
|
|
|
center = entry['center']
|
|
path = entry['path']
|
|
|
|
if isinstance(center, list):
|
|
|
|
# Use the center to figure out which canvas expansion the entry is in.
|
|
if center[1] > 1000:
|
|
time_range = EXPANSION_2_RANGE
|
|
elif center[0] > 1000:
|
|
time_range = EXPANSION_1_RANGE
|
|
else:
|
|
time_range = INIT_CANVAS_RANGE
|
|
|
|
time_key = '%d-%d, T:0' % time_range
|
|
|
|
new_entry = {
|
|
**new_entry,
|
|
"center": {
|
|
time_key: center
|
|
},
|
|
"path": {
|
|
time_key: path
|
|
}
|
|
}
|
|
|
|
del entry['center']
|
|
del entry['path']
|
|
|
|
if "website" in entry:
|
|
if isinstance(entry["website"], str) and entry["website"]:
|
|
new_entry['links']['website'] = [entry['website']]
|
|
del entry['website']
|
|
|
|
if "subreddit" in entry:
|
|
if isinstance(entry["subreddit"], str) and entry["subreddit"]:
|
|
new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
|
|
del entry['subreddit']
|
|
|
|
if "submitted_by" in entry:
|
|
new_entry['contributors'].append(entry['submitted_by'])
|
|
del entry['submitted_by']
|
|
|
|
entries[index] = {
|
|
**new_entry,
|
|
**entry
|
|
}
|
|
|
|
index += 1
|
|
|
|
if not (index % 1000):
|
|
print(f"{index} checked.")
|
|
|
|
print(f"{len(entries)} checked.")
|
|
print("Writing...")
|
|
|
|
with open(file_path, 'w', encoding='utf-8', newline='\n') as f2:
|
|
f2.write(per_line_entries(entries))
|
|
|
|
print("All done!") |