From b21fbff22846ae15726feacdb87957339f7beb57 Mon Sep 17 00:00:00 2001 From: Hans5958 Date: Mon, 18 Apr 2022 12:21:24 +0700 Subject: [PATCH] Also convert to the new links format, some other edits --- tools/migrate_atlas_format.py | 85 ++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 15 deletions(-) diff --git a/tools/migrate_atlas_format.py b/tools/migrate_atlas_format.py index f974d331..2a0618f9 100644 --- a/tools/migrate_atlas_format.py +++ b/tools/migrate_atlas_format.py @@ -1,5 +1,13 @@ import os import json +import re + +''' +Migrator script from old atlas format to remastered atlas format. +- center and path: single -> time-specific +- website and subreddit: single strings -> links object +''' +# # Migrates the old atlas format (single center/path) to the remastered atlas format (time-boxed centers/paths) @@ -11,34 +19,81 @@ def per_line_entries(entries: list): file_path = os.path.join('..', 'web', 'atlas.json') -end_image = 167 -init_canvas_range = (1, end_image) -expansion_1_range = (56, end_image) -expansion_2_range = (109, end_image) +END_IMAGE = 166 +INIT_CANVAS_RANGE = (1, END_IMAGE) +EXPANSION_1_RANGE = (56, END_IMAGE) +EXPANSION_2_RANGE = (109, END_IMAGE) + +COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +') +FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*') with open(file_path, 'r+', encoding='UTF-8') as file: entries = json.loads(file.read()) - + +index = 0 + for entry in entries: + new_entry = { + "id": "", + "name": "", + "description": "", + "links": {}, + "center": {}, + "path": {}, + } + center = entry['center'] + path = entry['path'] + if isinstance(center, list): - path = entry['path'] # Use the center to figure out which canvas expansion the entry is in. if center[1] > 1000: - time_range = expansion_2_range + time_range = EXPANSION_2_RANGE elif center[0] > 1000: - time_range = expansion_1_range + time_range = EXPANSION_1_RANGE else: - time_range = init_canvas_range - + time_range = INIT_CANVAS_RANGE + time_key = '%d-%d, T:0-2' % time_range - entry['center'] = { - time_key: center - } - entry['path'] = { - time_key: path + + new_entry = { + **new_entry, + "center": { + time_key: center + }, + "path": { + time_key: path + } } + del entry['center'] + del entry['path'] + + if "website" in entry: + if isinstance(entry["website"], str) and entry["website"]: + new_entry['links']['website'] = [entry['website']] + del entry['website'] + + if "subreddit" in entry: + if isinstance(entry["subreddit"], str) and entry["subreddit"]: + new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit']))) + del entry['subreddit'] + + entries[index] = { + **new_entry, + **entry + } + + index += 1 + + if not (index % 1000): + print(f"{index} checked.") + +print(f"{len(entries)} checked.") +print("Writing...") + with open(file_path, 'w', encoding='utf-8', newline='\n') as f2: f2.write(per_line_entries(entries)) + +print("All done!") \ No newline at end of file