mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-09-27 20:48:56 +02:00
Formatter support for new format
This commit is contained in:
parent
4648401a38
commit
bb2704c2d2
1 changed files with 213 additions and 68 deletions
|
@ -52,27 +52,45 @@ def format_subreddit(entry: dict):
|
|||
"""
|
||||
Fix formatting of the value on "subreddit".
|
||||
"""
|
||||
if not "subreddit" in entry or not entry['subreddit']:
|
||||
return entry
|
||||
|
||||
subredditLink = entry["subreddit"]
|
||||
subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2user"], USER_TEMPLATE, subredditLink)
|
||||
if "subreddit" in entry and entry["subreddit"]:
|
||||
|
||||
subredditLink = entry["subreddit"]
|
||||
|
||||
subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2user"], USER_TEMPLATE, subredditLink)
|
||||
|
||||
entry["subreddit"] = subredditLink
|
||||
|
||||
if "links" in entry and "subreddit" in entry["links"]:
|
||||
|
||||
for i in range(len(entry["links"]["subreddit"])):
|
||||
|
||||
subredditLink = entry["links"]["subreddit"][i]
|
||||
|
||||
subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3"], r"\1", subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1"], r"\1", subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2"], r"\1", subredditLink)
|
||||
|
||||
entry["links"]["subreddit"][i] = subredditLink
|
||||
|
||||
if not subredditLink:
|
||||
return entry
|
||||
|
||||
entry["subreddit"] = subredditLink
|
||||
return entry
|
||||
|
||||
def collapse_links(entry: dict):
|
||||
"""
|
||||
Collapses Markdown links.
|
||||
"""
|
||||
|
||||
if "website" in entry and entry['website']:
|
||||
website = entry["website"];
|
||||
|
||||
website = entry["website"]
|
||||
|
||||
if re.search(CL_REGEX, website):
|
||||
match = re.search(CL_REGEX, website)
|
||||
if match.group(1) == match.group(2):
|
||||
|
@ -80,8 +98,23 @@ def collapse_links(entry: dict):
|
|||
|
||||
entry["website"] = website
|
||||
|
||||
elif "links" in entry and "website" in entry["links"]:
|
||||
|
||||
for i in range(len(entry["links"]["website"])):
|
||||
|
||||
website = entry["links"]["website"][i]
|
||||
|
||||
if re.search(CL_REGEX, website):
|
||||
match = re.search(CL_REGEX, website)
|
||||
if match.group(1) == match.group(2):
|
||||
website = match.group(2)
|
||||
|
||||
entry["links"]["website"][i] = website
|
||||
|
||||
if "subreddit" in entry and entry['subreddit']:
|
||||
subreddit = entry["subreddit"];
|
||||
|
||||
subreddit = entry["subreddit"]
|
||||
|
||||
if re.search(CL_REGEX, subreddit):
|
||||
match = re.search(CL_REGEX, subreddit)
|
||||
if match.group(1) == match.group(2):
|
||||
|
@ -89,12 +122,27 @@ def collapse_links(entry: dict):
|
|||
|
||||
entry["subreddit"] = subreddit
|
||||
|
||||
elif "links" in entry and "subreddit" in entry["links"]:
|
||||
|
||||
for i in range(len(entry["links"]["subreddit"])):
|
||||
|
||||
subreddit = entry["links"]["subreddit"][i]
|
||||
|
||||
if re.search(CL_REGEX, subreddit):
|
||||
match = re.search(CL_REGEX, subreddit)
|
||||
if match.group(1) == match.group(2):
|
||||
subreddit = match.group(2)
|
||||
|
||||
entry["links"]["subreddit"][i] = subreddit
|
||||
|
||||
|
||||
return entry
|
||||
|
||||
def remove_extras(entry: dict):
|
||||
"""
|
||||
Removing unnecessary extra characters and converts select characters.
|
||||
"""
|
||||
|
||||
if "subreddit" in entry and entry["subreddit"]:
|
||||
# if not entry["subreddit"].startswith('/r/'):
|
||||
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
|
||||
|
@ -124,13 +172,27 @@ def remove_duplicate_points(entry: dict):
|
|||
"""
|
||||
Removes points from paths that occur twice after each other
|
||||
"""
|
||||
path: list = entry['path']
|
||||
previous: list = path[0]
|
||||
for i in range(len(path)-1, -1, -1):
|
||||
current: list = path[i]
|
||||
if current == previous:
|
||||
path.pop(i)
|
||||
previous = current
|
||||
|
||||
if not "path" in entry:
|
||||
return entry
|
||||
|
||||
if isinstance(entry['path'], list):
|
||||
path: list = entry['path']
|
||||
previous: list = path[0]
|
||||
for i in range(len(path)-1, -1, -1):
|
||||
current: list = path[i]
|
||||
if current == previous:
|
||||
path.pop(i)
|
||||
previous = current
|
||||
else:
|
||||
for key in entry['path']:
|
||||
path: list = entry['path'][key]
|
||||
previous: list = path[0]
|
||||
for i in range(len(path)-1, -1, -1):
|
||||
current: list = path[i]
|
||||
if current == previous:
|
||||
path.pop(i)
|
||||
previous = current
|
||||
|
||||
return entry
|
||||
|
||||
|
@ -138,6 +200,7 @@ def fix_r_caps(entry: dict):
|
|||
"""
|
||||
Fixes capitalization of /r/. (/R/place -> /r/place)
|
||||
"""
|
||||
|
||||
if not "description" in entry or not entry['description']:
|
||||
return entry
|
||||
|
||||
|
@ -150,11 +213,14 @@ def fix_no_protocol_urls(entry: dict):
|
|||
"""
|
||||
Fixes URLs with no protocol by adding "https://" protocol.
|
||||
"""
|
||||
if not "website" in entry or not entry['website']:
|
||||
return entry
|
||||
|
||||
if not entry["website"].startswith("http"):
|
||||
entry["website"] = "https://" + entry["website"]
|
||||
|
||||
if "links" in entry and "website" in entry['links']:
|
||||
for i in range(len(entry["links"]["website"])):
|
||||
if entry["links"]["website"][i] and not entry["links"]["website"][i].startswith("http"):
|
||||
entry["links"]["website"][i] = "https://" + entry["website"]
|
||||
elif "website" in entry and not entry['website']:
|
||||
if not entry["website"].startswith("http"):
|
||||
entry["website"] = "https://" + entry["website"]
|
||||
|
||||
return entry
|
||||
|
||||
|
@ -162,23 +228,43 @@ def convert_website_to_subreddit(entry: dict):
|
|||
"""
|
||||
Converts the subreddit link on "website" to "subreddit" if possible.
|
||||
"""
|
||||
if not "website" in entry or not entry['website']:
|
||||
return entry
|
||||
|
||||
if re.match(CWTS_REGEX["url"], entry["website"]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["url"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||
entry["website"] = ""
|
||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||
entry["subreddit"] = new_subreddit
|
||||
entry["website"] = ""
|
||||
elif re.match(CWTS_REGEX["subreddit"], entry["website"]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["subreddit"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||
entry["website"] = ""
|
||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||
entry["subreddit"] = new_subreddit
|
||||
entry["website"] = ""
|
||||
if "links" in entry and "website" in entry["links"]:
|
||||
for i in range(len(entry["links"]["website"])):
|
||||
if re.match(CWTS_REGEX["url"], entry["links"]["website"][i]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["url"], r"\1", entry["links"]["website"][i])
|
||||
if new_subreddit in entry["links"]["subreddit"]:
|
||||
entry["links"]["website"][i] = ""
|
||||
elif not "subreddit" in entry["links"] or len(entry["subreddit"]) == 0:
|
||||
if not "subreddit" in entry["links"]:
|
||||
entry["links"]["subreddit"] = []
|
||||
entry["links"]["subreddit"].append(new_subreddit)
|
||||
entry["links"]["website"][i] = ""
|
||||
elif re.match(CWTS_REGEX["subreddit"], entry["links"]["website"][i]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["subreddit"], r"\1", entry["links"]["website"][i])
|
||||
if new_subreddit in entry["links"]["subreddit"]:
|
||||
entry["links"]["website"][i] = ""
|
||||
elif not "subreddit" in entry["links"] or len(entry["subreddit"]) == 0:
|
||||
if not "subreddit" in entry["links"]:
|
||||
entry["links"]["subreddit"] = []
|
||||
entry["links"]["subreddit"].append(new_subreddit)
|
||||
entry["links"]["website"][i] = ""
|
||||
|
||||
elif "website" in entry and entry['website']:
|
||||
if re.match(CWTS_REGEX["url"], entry["website"]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["url"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||
entry["website"] = ""
|
||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||
entry["subreddit"] = new_subreddit
|
||||
entry["website"] = ""
|
||||
elif re.match(CWTS_REGEX["subreddit"], entry["website"]):
|
||||
new_subreddit = re.sub(CWTS_REGEX["subreddit"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||
entry["website"] = ""
|
||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||
entry["subreddit"] = new_subreddit
|
||||
entry["website"] = ""
|
||||
|
||||
return entry
|
||||
|
||||
|
@ -186,20 +272,37 @@ def convert_subreddit_to_website(entry: dict):
|
|||
"""
|
||||
Converts the links on "subreddit" to a "website" if needed. This also supports Reddit users (/u/reddit).
|
||||
"""
|
||||
if not "subreddit" in entry or not entry['subreddit']:
|
||||
return entry
|
||||
|
||||
if re.match(CSTW_REGEX["website"], entry["subreddit"]):
|
||||
if (entry["website"].lower() == entry["subreddit"].lower()):
|
||||
entry["subreddit"] = ""
|
||||
elif not "website" in entry or entry['website'] == "":
|
||||
entry["website"] = entry["subreddit"]
|
||||
entry["subreddit"] = ""
|
||||
elif re.match(CSTW_REGEX["user"], entry["subreddit"]):
|
||||
if not "website" in entry or entry['website'] == "":
|
||||
username = re.match(CSTW_REGEX["user"], entry["subreddit"]).group(1)
|
||||
entry["website"] = "https://www.reddit.com/user/" + username
|
||||
entry["subreddit"] = ""
|
||||
if "links" in entry and "subreddit" in entry["links"]:
|
||||
for i in range(len(entry["links"]["subreddit"])):
|
||||
if re.match(CSTW_REGEX["website"], entry["links"]["subreddit"][i]):
|
||||
if "website" in entry["links"] and entry["links"]["subreddit"][i] in entry["links"]["website"]:
|
||||
entry["links"]["subreddit"][i] = ""
|
||||
elif not "website" in entry["links"] or len(entry["website"]) == 0:
|
||||
if not "website" in entry["links"]:
|
||||
entry["links"]["website"] = []
|
||||
entry["website"].append(entry["links"]["subreddit"][i])
|
||||
entry["links"]["subreddit"][i] = ""
|
||||
elif re.match(CSTW_REGEX["user"], entry["links"]["subreddit"][i]):
|
||||
if not "website" in entry["links"] or len(entry["website"]) == 0:
|
||||
username = re.match(CSTW_REGEX["user"], entry["links"]["subreddit"][i]).group(1)
|
||||
if not "website" in entry["links"]:
|
||||
entry["links"]["website"] = []
|
||||
entry["website"].append("https://www.reddit.com/user/" + username)
|
||||
entry["links"]["subreddit"][i] = ""
|
||||
|
||||
elif "subreddit" in entry and entry['subreddit']:
|
||||
if re.match(CSTW_REGEX["website"], entry["subreddit"]):
|
||||
if (entry["website"].lower() == entry["subreddit"].lower()):
|
||||
entry["subreddit"] = ""
|
||||
elif not "website" in entry or entry['website'] == "":
|
||||
entry["website"] = entry["subreddit"]
|
||||
entry["subreddit"] = ""
|
||||
elif re.match(CSTW_REGEX["user"], entry["subreddit"]):
|
||||
if not "website" in entry or entry['website'] == "":
|
||||
username = re.match(CSTW_REGEX["user"], entry["subreddit"]).group(1)
|
||||
entry["website"] = "https://www.reddit.com/user/" + username
|
||||
entry["subreddit"] = ""
|
||||
|
||||
return entry
|
||||
|
||||
|
@ -235,17 +338,40 @@ def calculate_center(path: list):
|
|||
|
||||
def update_center(entry: dict):
|
||||
"""
|
||||
checks if the center of a entry is up to date, and updates it if it's either missing or outdated
|
||||
checks if the center of a entry is up to date, and updates it if it's either missing or outdated.
|
||||
"""
|
||||
|
||||
if 'path' not in entry:
|
||||
return entry
|
||||
path = entry['path']
|
||||
if len(path) > 1:
|
||||
calculated_center = calculate_center(path)
|
||||
if 'center' not in entry or entry['center'] != calculated_center:
|
||||
entry['center'] = calculated_center
|
||||
|
||||
if isinstance(entry['path'], list):
|
||||
path = entry['path']
|
||||
if len(path) > 1:
|
||||
calculated_center = calculate_center(path)
|
||||
if 'center' not in entry or entry['center'] != calculated_center:
|
||||
entry['center'] = calculated_center
|
||||
else:
|
||||
for key in entry['path']:
|
||||
path = entry['path'][key]
|
||||
if len(path) > 1:
|
||||
calculated_center = calculate_center(path)
|
||||
if 'center' not in entry or key not in entry['center'] or entry['center'][key] != calculated_center:
|
||||
entry['center'][key] = calculated_center
|
||||
|
||||
return entry
|
||||
|
||||
def remove_empty_and_similar(entry: dict):
|
||||
"""
|
||||
Removes empty items on lists, usually from the past formattings.
|
||||
"""
|
||||
|
||||
for key in entry["links"]:
|
||||
small = list(map(lambda x: x.lower(), entry["links"][key]))
|
||||
entry["links"][key] = [x for x in entry["links"][key] if x and x.lower() in small]
|
||||
|
||||
return entry
|
||||
|
||||
|
||||
def validate(entry: dict):
|
||||
"""
|
||||
Validates the entry. Catch errors and tell warnings related to the entry.
|
||||
|
@ -256,17 +382,34 @@ def validate(entry: dict):
|
|||
2: Warnings that may effect user experience when interacting with the entry
|
||||
3: Errors that make the entry inaccessible or broken.
|
||||
"""
|
||||
|
||||
return_status = 0
|
||||
if (not "id" in entry or (not entry['id'] and not entry['id'] == 0)):
|
||||
print(f"Wait, no id here! How did this happened? {entry}")
|
||||
return_status = 3
|
||||
entry['id'] = '[MISSING_ID]'
|
||||
if not ("path" in entry and isinstance(entry["path"], list) and len(entry["path"]) > 0):
|
||||
print(f"Entry {entry['id']} has no points!")
|
||||
return_status = 3
|
||||
elif len(entry["path"]) < 3:
|
||||
print(f"Entry {entry['id']} only has {len(entry['path'])} point(s)!")
|
||||
|
||||
if "path" in entry:
|
||||
if isinstance(entry['path'], list):
|
||||
if len(entry["path"]) > 0:
|
||||
print(f"Entry {entry['id']} has no points!")
|
||||
return_status = 3
|
||||
elif len(entry["path"]) < 3:
|
||||
print(f"Entry {entry['id']} only has {len(entry['path'])} point(s)!")
|
||||
return_status = 3
|
||||
else:
|
||||
for key in entry['path']:
|
||||
path = entry['path'][key]
|
||||
if len(path) > 0:
|
||||
print(f"Period {key} of entry {entry['id']} has no points!")
|
||||
return_status = 3
|
||||
elif len(path) < 3:
|
||||
print(f"Period {key} of entry {entry['id']} only has {len(entry['path'])} point(s)!")
|
||||
return_status = 3
|
||||
else:
|
||||
print(f"Entry {entry['id']} has no path at all!")
|
||||
return_status = 3
|
||||
|
||||
for key in entry:
|
||||
if key in VALIDATE_REGEX and not re.match(VALIDATE_REGEX[key], entry[key]):
|
||||
if return_status < 2: return_status = 2
|
||||
|
@ -316,6 +459,8 @@ def print_(*args, **kwargs):
|
|||
entry = remove_duplicate_points(entry)
|
||||
print_("Updating center...")
|
||||
entry = update_center(entry)
|
||||
print_("Remove empty items...")
|
||||
entry = remove_empty_and_similar(entry)
|
||||
print_("Validating...")
|
||||
status_code = validate(entry)
|
||||
print_("Completed!")
|
||||
|
|
Loading…
Reference in a new issue