atlas/tools/migrate_atlas_format.py

#!/usr/bin/python

"""
Migrator script from old atlas format to remastered atlas format.
- center and path: single -> time-specific
- website and subreddit: single strings -> links object
- submitted_by -> contributors
"""

import re
import json

END_IMAGE = 166
INIT_CANVAS_RANGE = (1, END_IMAGE)
EXPANSION_1_RANGE = (56, END_IMAGE)
EXPANSION_2_RANGE = (109, END_IMAGE)

COMMATIZATION =  re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +')
FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*')

def migrate_atlas_format(entry: dict):
	new_entry = {
		"id": "",
		"name": "",
		"description": "",
		"links": {},
		"center": {},
		"path": {},
		"contributors": []
	}

	center = entry['center']
	path = entry['path']

	if isinstance(center, list):
		
		# Use the center to figure out which canvas expansion the entry is in.
		if center[1] > 1000:
			time_range = EXPANSION_2_RANGE
		elif center[0] > 1000:
			time_range = EXPANSION_1_RANGE
		else:
			time_range = INIT_CANVAS_RANGE

		time_key = '%d-%d, T:0' % time_range

		new_entry = {
		**new_entry,
		"center": {
			time_key: center
		},
		"path": {
			time_key: path
		}
		}

		del entry['center']
		del entry['path']

	if "website" in entry:
		if isinstance(entry["website"], str) and entry["website"]:
			new_entry['links']['website'] = [entry['website']]
		del entry['website']

	if "subreddit" in entry:
		if isinstance(entry["subreddit"], str) and entry["subreddit"]:
			new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))
		del entry['subreddit']

	if "submitted_by" in entry:
		new_entry['contributors'].append(entry['submitted_by'])
		del entry['submitted_by']
	
	toreturn = {
		**new_entry,
		**entry
	}

	return toreturn

def per_line_entries(entries: list):
	"""
	Returns a string of all the entries, with every entry in one line.
	"""
	out = "[\n"
	for entry in entries:
		if entry:
			out += json.dumps(entry, ensure_ascii=False) + ",\n"
	out = out[:-2] + "\n]"
	return out

if __name__ == '__main__':

	def go(path):

		print(f"Formatting {path}...")

		with open(path, "r+", encoding='UTF-8') as f1:
			entries = json.loads(f1.read())

		for i in range(len(entries)):
			entry_formatted = migrate_atlas_format(entries[i])
			entries[i] = entry_formatted
			if not (i % 1000):
				print(f"{i} checked.")

		print(f"{len(entries)} checked. Writing...")

		with open(path, "w", encoding='utf-8', newline='\n') as f2:
			f2.write(per_line_entries(entries))

		print("Writing completed. All done.")

	go("../web/atlas.json")
Little rewrite on migrator, migrate submissions if needed, add writing log 2022-04-25 10:50:26 +02:00			`#!/usr/bin/python`
Also convert to the new links format, some other edits 2022-04-18 07:21:24 +02:00
Little rewrite on migrator, migrate submissions if needed, add writing log 2022-04-25 10:50:26 +02:00			`"""`
Also convert to the new links format, some other edits 2022-04-18 07:21:24 +02:00			`Migrator script from old atlas format to remastered atlas format.`
			`- center and path: single -> time-specific`
			`- website and subreddit: single strings -> links object`
Migrate contributors, move contributors to the end, simplify script Forgot that there won't be submitted_by on the submissions 2022-04-18 16:21:02 +02:00			`- submitted_by -> contributors`
Little rewrite on migrator, migrate submissions if needed, add writing log 2022-04-25 10:50:26 +02:00			`"""`
Added script to migrate atlas formats 2022-04-18 05:36:14 +02:00
Little rewrite on migrator, migrate submissions if needed, add writing log 2022-04-25 10:50:26 +02:00			`import re`
			`import json`
Added script to migrate atlas formats 2022-04-18 05:36:14 +02:00
Also convert to the new links format, some other edits 2022-04-18 07:21:24 +02:00			`END_IMAGE = 166`
			`INIT_CANVAS_RANGE = (1, END_IMAGE)`
			`EXPANSION_1_RANGE = (56, END_IMAGE)`
			`EXPANSION_2_RANGE = (109, END_IMAGE)`

			`COMMATIZATION = re.compile(r'(?: (?:,+ +\|,+ \|,+)\| +)(?:and\|&\|;)(?: (?:,+ +\|,+ \|,+)\| +)\|, *$\| +')`
Do stricter (and proper) subreddit regex 2 I forgot that I made it so it excludes underscore, here's a proper fix 2022-04-19 11:42:20 +02:00			`FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www\|old\|new\|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ])')`
Added script to migrate atlas formats 2022-04-18 05:36:14 +02:00
Little rewrite on migrator, migrate submissions if needed, add writing log 2022-04-25 10:50:26 +02:00			`def migrate_atlas_format(entry: dict):`
			`new_entry = {`
			`"id": "",`
			`"name": "",`
			`"description": "",`
			`"links": {},`
			`"center": {},`
			`"path": {},`
			`"contributors": []`
			`}`

			`center = entry['center']`
			`path = entry['path']`

			`if isinstance(center, list):`

			`# Use the center to figure out which canvas expansion the entry is in.`
			`if center[1] > 1000:`
			`time_range = EXPANSION_2_RANGE`
			`elif center[0] > 1000:`
			`time_range = EXPANSION_1_RANGE`
			`else:`
			`time_range = INIT_CANVAS_RANGE`

			`time_key = '%d-%d, T:0' % time_range`

			`new_entry = {`
			`**new_entry,`
			`"center": {`
			`time_key: center`
			`},`
			`"path": {`
			`time_key: path`
			`}`
			`}`

			`del entry['center']`
			`del entry['path']`

			`if "website" in entry:`
			`if isinstance(entry["website"], str) and entry["website"]:`
			`new_entry['links']['website'] = [entry['website']]`
			`del entry['website']`

			`if "subreddit" in entry:`
			`if isinstance(entry["subreddit"], str) and entry["subreddit"]:`
			`new_entry['links']['subreddit'] = list(map(lambda x: FS_REGEX.sub(r"\1", x), COMMATIZATION.split(entry['subreddit'])))`
			`del entry['subreddit']`

			`if "submitted_by" in entry:`
			`new_entry['contributors'].append(entry['submitted_by'])`
			`del entry['submitted_by']`

			`toreturn = {`
			`**new_entry,`
			`**entry`
			`}`

			`return toreturn`

			`def per_line_entries(entries: list):`
			`"""`
			`Returns a string of all the entries, with every entry in one line.`
			`"""`
			`out = "[\n"`
			`for entry in entries:`
			`if entry:`
			`out += json.dumps(entry, ensure_ascii=False) + ",\n"`
			`out = out[:-2] + "\n]"`
			`return out`

			`if __name__ == '__main__':`

			`def go(path):`

			`print(f"Formatting {path}...")`

			`with open(path, "r+", encoding='UTF-8') as f1:`
			`entries = json.loads(f1.read())`

			`for i in range(len(entries)):`
			`entry_formatted = migrate_atlas_format(entries[i])`
			`entries[i] = entry_formatted`
			`if not (i % 1000):`
			`print(f"{i} checked.")`

			`print(f"{len(entries)} checked. Writing...")`

			`with open(path, "w", encoding='utf-8', newline='\n') as f2:`
			`f2.write(per_line_entries(entries))`

			`print("Writing completed. All done.")`

			`go("../web/atlas.json")`