Also convert to the new links format, some other edits

2024-09-27 20:48:56 +02:00 · 2022-04-18 12:21:24 +07:00 · 2022-04-18 12:21:24 +07:00 · b21fbff228
commit b21fbff228
parent ce34865053
1 changed files with 70 additions and 15 deletions
--- a/tools/migrate_atlas_format.py
+++ b/tools/migrate_atlas_format.py
@ -1,5 +1,13 @@
 import os
 import json
+import re
+
+'''
+Migrator script from old atlas format to remastered atlas format.
+- center and path: single -> time-specific
+- website and subreddit: single strings -> links object
+'''
+# 

 # Migrates the old atlas format (single center/path) to the remastered atlas format (time-boxed centers/paths)

@ -11,34 +19,81 @@ def per_line_entries(entries: list):

 file_path = os.path.join('..', 'web', 'atlas.json')

-end_image = 167
-init_canvas_range = (1, end_image)
-expansion_1_range = (56, end_image)
-expansion_2_range = (109, end_image)
+END_IMAGE = 166
+INIT_CANVAS_RANGE = (1, END_IMAGE)
+EXPANSION_1_RANGE = (56, END_IMAGE)
+EXPANSION_2_RANGE = (109, END_IMAGE)
+
+COMMATIZATION =  re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +')
+FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*')

 with open(file_path, 'r+', encoding='UTF-8') as file:
  entries = json.loads(file.read())
-  
+
+index = 0
+
 for entry in entries:
+  new_entry = {
+    "id": "",
+    "name": "",
+    "description": "",
+    "links": {},
+    "center": {},
+    "path": {},
+  }
+
  center = entry['center']
+  path = entry['path']
+
  if isinstance(center, list):
-    path = entry['path']
    
    # Use the center to figure out which canvas expansion the entry is in.
    if center[1] > 1000:
-      time_range = expansion_2_range
+      time_range = EXPANSION_2_RANGE
    elif center[0] > 1000:
-      time_range = expansion_1_range
+      time_range = EXPANSION_1_RANGE
    else:
-      time_range = init_canvas_range
-    
+      time_range = INIT_CANVAS_RANGE
+
    time_key = '%d-%d, T:0-2' % time_range
-    entry['center'] = {
-      time_key: center
-    }
-    entry['path'] = {
-      time_key: path
+
+    new_entry = {
+      **new_entry,
+      "center": {
+        time_key: center
+      },
+      "path": {
+        time_key: path
+      }
    }

+    del entry['center']
+    del entry['path']
+
+  if "website" in entry:
+    if isinstance(entry["website"], str) and entry["website"]:
+      new_entry['links']['website'] = [entry['website']]
+    del entry['website']
+
+  if "subreddit" in entry:
+    if isinstance(entry["subreddit"], str) and entry["subreddit"]:
+      new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
+    del entry['subreddit']
+ 
+  entries[index] = {
+    **new_entry,
+    **entry
+  }
+
+  index += 1
+
+  if not (index % 1000):
+    print(f"{index} checked.")
+
+print(f"{len(entries)} checked.")
+print("Writing...")
+
 with open(file_path, 'w', encoding='utf-8', newline='\n') as f2:
  f2.write(per_line_entries(entries))
+
+print("All done!")