diff --git a/tools/formatter.py b/tools/formatter.py index 44cfdeed..68d68e5f 100644 --- a/tools/formatter.py +++ b/tools/formatter.py @@ -52,27 +52,45 @@ def format_subreddit(entry: dict): """ Fix formatting of the value on "subreddit". """ - if not "subreddit" in entry or not entry['subreddit']: - return entry - subredditLink = entry["subreddit"] - subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink) - subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink) - subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink) - subredditLink = re.sub(FS_REGEX["pattern2"], SUBREDDIT_TEMPLATE, subredditLink) - subredditLink = re.sub(FS_REGEX["pattern3user"], USER_TEMPLATE, subredditLink) - subredditLink = re.sub(FS_REGEX["pattern1user"], USER_TEMPLATE, subredditLink) - subredditLink = re.sub(FS_REGEX["pattern2user"], USER_TEMPLATE, subredditLink) + if "subreddit" in entry and entry["subreddit"]: + + subredditLink = entry["subreddit"] + + subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink) + subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink) + subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink) + subredditLink = re.sub(FS_REGEX["pattern2"], SUBREDDIT_TEMPLATE, subredditLink) + subredditLink = re.sub(FS_REGEX["pattern3user"], USER_TEMPLATE, subredditLink) + subredditLink = re.sub(FS_REGEX["pattern1user"], USER_TEMPLATE, subredditLink) + subredditLink = re.sub(FS_REGEX["pattern2user"], USER_TEMPLATE, subredditLink) + + entry["subreddit"] = subredditLink + + if "links" in entry and "subreddit" in entry["links"]: + + for i in range(len(entry["links"]["subreddit"])): + + subredditLink = entry["links"]["subreddit"][i] + + subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink) + subredditLink = re.sub(FS_REGEX["pattern3"], r"\1", subredditLink) + subredditLink = re.sub(FS_REGEX["pattern1"], r"\1", subredditLink) + subredditLink = re.sub(FS_REGEX["pattern2"], r"\1", subredditLink) + + entry["links"]["subreddit"][i] = subredditLink - if not subredditLink: - return entry - - entry["subreddit"] = subredditLink return entry def collapse_links(entry: dict): + """ + Collapses Markdown links. + """ + if "website" in entry and entry['website']: - website = entry["website"]; + + website = entry["website"] + if re.search(CL_REGEX, website): match = re.search(CL_REGEX, website) if match.group(1) == match.group(2): @@ -80,8 +98,23 @@ def collapse_links(entry: dict): entry["website"] = website + elif "links" in entry and "website" in entry["links"]: + + for i in range(len(entry["links"]["website"])): + + website = entry["links"]["website"][i] + + if re.search(CL_REGEX, website): + match = re.search(CL_REGEX, website) + if match.group(1) == match.group(2): + website = match.group(2) + + entry["links"]["website"][i] = website + if "subreddit" in entry and entry['subreddit']: - subreddit = entry["subreddit"]; + + subreddit = entry["subreddit"] + if re.search(CL_REGEX, subreddit): match = re.search(CL_REGEX, subreddit) if match.group(1) == match.group(2): @@ -89,12 +122,27 @@ def collapse_links(entry: dict): entry["subreddit"] = subreddit + elif "links" in entry and "subreddit" in entry["links"]: + + for i in range(len(entry["links"]["subreddit"])): + + subreddit = entry["links"]["subreddit"][i] + + if re.search(CL_REGEX, subreddit): + match = re.search(CL_REGEX, subreddit) + if match.group(1) == match.group(2): + subreddit = match.group(2) + + entry["links"]["subreddit"][i] = subreddit + + return entry def remove_extras(entry: dict): """ Removing unnecessary extra characters and converts select characters. """ + if "subreddit" in entry and entry["subreddit"]: # if not entry["subreddit"].startswith('/r/'): # entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"]) @@ -124,13 +172,27 @@ def remove_duplicate_points(entry: dict): """ Removes points from paths that occur twice after each other """ - path: list = entry['path'] - previous: list = path[0] - for i in range(len(path)-1, -1, -1): - current: list = path[i] - if current == previous: - path.pop(i) - previous = current + + if not "path" in entry: + return entry + + if isinstance(entry['path'], list): + path: list = entry['path'] + previous: list = path[0] + for i in range(len(path)-1, -1, -1): + current: list = path[i] + if current == previous: + path.pop(i) + previous = current + else: + for key in entry['path']: + path: list = entry['path'][key] + previous: list = path[0] + for i in range(len(path)-1, -1, -1): + current: list = path[i] + if current == previous: + path.pop(i) + previous = current return entry @@ -138,6 +200,7 @@ def fix_r_caps(entry: dict): """ Fixes capitalization of /r/. (/R/place -> /r/place) """ + if not "description" in entry or not entry['description']: return entry @@ -150,11 +213,14 @@ def fix_no_protocol_urls(entry: dict): """ Fixes URLs with no protocol by adding "https://" protocol. """ - if not "website" in entry or not entry['website']: - return entry - - if not entry["website"].startswith("http"): - entry["website"] = "https://" + entry["website"] + + if "links" in entry and "website" in entry['links']: + for i in range(len(entry["links"]["website"])): + if entry["links"]["website"][i] and not entry["links"]["website"][i].startswith("http"): + entry["links"]["website"][i] = "https://" + entry["website"] + elif "website" in entry and not entry['website']: + if not entry["website"].startswith("http"): + entry["website"] = "https://" + entry["website"] return entry @@ -162,23 +228,43 @@ def convert_website_to_subreddit(entry: dict): """ Converts the subreddit link on "website" to "subreddit" if possible. """ - if not "website" in entry or not entry['website']: - return entry - if re.match(CWTS_REGEX["url"], entry["website"]): - new_subreddit = re.sub(CWTS_REGEX["url"], SUBREDDIT_TEMPLATE, entry["website"]) - if (new_subreddit.lower() == entry["subreddit"].lower()): - entry["website"] = "" - elif not "subreddit" in entry or entry['subreddit'] == "": - entry["subreddit"] = new_subreddit - entry["website"] = "" - elif re.match(CWTS_REGEX["subreddit"], entry["website"]): - new_subreddit = re.sub(CWTS_REGEX["subreddit"], SUBREDDIT_TEMPLATE, entry["website"]) - if (new_subreddit.lower() == entry["subreddit"].lower()): - entry["website"] = "" - elif not "subreddit" in entry or entry['subreddit'] == "": - entry["subreddit"] = new_subreddit - entry["website"] = "" + if "links" in entry and "website" in entry["links"]: + for i in range(len(entry["links"]["website"])): + if re.match(CWTS_REGEX["url"], entry["links"]["website"][i]): + new_subreddit = re.sub(CWTS_REGEX["url"], r"\1", entry["links"]["website"][i]) + if new_subreddit in entry["links"]["subreddit"]: + entry["links"]["website"][i] = "" + elif not "subreddit" in entry["links"] or len(entry["subreddit"]) == 0: + if not "subreddit" in entry["links"]: + entry["links"]["subreddit"] = [] + entry["links"]["subreddit"].append(new_subreddit) + entry["links"]["website"][i] = "" + elif re.match(CWTS_REGEX["subreddit"], entry["links"]["website"][i]): + new_subreddit = re.sub(CWTS_REGEX["subreddit"], r"\1", entry["links"]["website"][i]) + if new_subreddit in entry["links"]["subreddit"]: + entry["links"]["website"][i] = "" + elif not "subreddit" in entry["links"] or len(entry["subreddit"]) == 0: + if not "subreddit" in entry["links"]: + entry["links"]["subreddit"] = [] + entry["links"]["subreddit"].append(new_subreddit) + entry["links"]["website"][i] = "" + + elif "website" in entry and entry['website']: + if re.match(CWTS_REGEX["url"], entry["website"]): + new_subreddit = re.sub(CWTS_REGEX["url"], SUBREDDIT_TEMPLATE, entry["website"]) + if (new_subreddit.lower() == entry["subreddit"].lower()): + entry["website"] = "" + elif not "subreddit" in entry or entry['subreddit'] == "": + entry["subreddit"] = new_subreddit + entry["website"] = "" + elif re.match(CWTS_REGEX["subreddit"], entry["website"]): + new_subreddit = re.sub(CWTS_REGEX["subreddit"], SUBREDDIT_TEMPLATE, entry["website"]) + if (new_subreddit.lower() == entry["subreddit"].lower()): + entry["website"] = "" + elif not "subreddit" in entry or entry['subreddit'] == "": + entry["subreddit"] = new_subreddit + entry["website"] = "" return entry @@ -186,20 +272,37 @@ def convert_subreddit_to_website(entry: dict): """ Converts the links on "subreddit" to a "website" if needed. This also supports Reddit users (/u/reddit). """ - if not "subreddit" in entry or not entry['subreddit']: - return entry - if re.match(CSTW_REGEX["website"], entry["subreddit"]): - if (entry["website"].lower() == entry["subreddit"].lower()): - entry["subreddit"] = "" - elif not "website" in entry or entry['website'] == "": - entry["website"] = entry["subreddit"] - entry["subreddit"] = "" - elif re.match(CSTW_REGEX["user"], entry["subreddit"]): - if not "website" in entry or entry['website'] == "": - username = re.match(CSTW_REGEX["user"], entry["subreddit"]).group(1) - entry["website"] = "https://www.reddit.com/user/" + username - entry["subreddit"] = "" + if "links" in entry and "subreddit" in entry["links"]: + for i in range(len(entry["links"]["subreddit"])): + if re.match(CSTW_REGEX["website"], entry["links"]["subreddit"][i]): + if "website" in entry["links"] and entry["links"]["subreddit"][i] in entry["links"]["website"]: + entry["links"]["subreddit"][i] = "" + elif not "website" in entry["links"] or len(entry["website"]) == 0: + if not "website" in entry["links"]: + entry["links"]["website"] = [] + entry["website"].append(entry["links"]["subreddit"][i]) + entry["links"]["subreddit"][i] = "" + elif re.match(CSTW_REGEX["user"], entry["links"]["subreddit"][i]): + if not "website" in entry["links"] or len(entry["website"]) == 0: + username = re.match(CSTW_REGEX["user"], entry["links"]["subreddit"][i]).group(1) + if not "website" in entry["links"]: + entry["links"]["website"] = [] + entry["website"].append("https://www.reddit.com/user/" + username) + entry["links"]["subreddit"][i] = "" + + elif "subreddit" in entry and entry['subreddit']: + if re.match(CSTW_REGEX["website"], entry["subreddit"]): + if (entry["website"].lower() == entry["subreddit"].lower()): + entry["subreddit"] = "" + elif not "website" in entry or entry['website'] == "": + entry["website"] = entry["subreddit"] + entry["subreddit"] = "" + elif re.match(CSTW_REGEX["user"], entry["subreddit"]): + if not "website" in entry or entry['website'] == "": + username = re.match(CSTW_REGEX["user"], entry["subreddit"]).group(1) + entry["website"] = "https://www.reddit.com/user/" + username + entry["subreddit"] = "" return entry @@ -235,17 +338,40 @@ def calculate_center(path: list): def update_center(entry: dict): """ - checks if the center of a entry is up to date, and updates it if it's either missing or outdated + checks if the center of a entry is up to date, and updates it if it's either missing or outdated. """ + if 'path' not in entry: return entry - path = entry['path'] - if len(path) > 1: - calculated_center = calculate_center(path) - if 'center' not in entry or entry['center'] != calculated_center: - entry['center'] = calculated_center + + if isinstance(entry['path'], list): + path = entry['path'] + if len(path) > 1: + calculated_center = calculate_center(path) + if 'center' not in entry or entry['center'] != calculated_center: + entry['center'] = calculated_center + else: + for key in entry['path']: + path = entry['path'][key] + if len(path) > 1: + calculated_center = calculate_center(path) + if 'center' not in entry or key not in entry['center'] or entry['center'][key] != calculated_center: + entry['center'][key] = calculated_center + return entry +def remove_empty_and_similar(entry: dict): + """ + Removes empty items on lists, usually from the past formattings. + """ + + for key in entry["links"]: + small = list(map(lambda x: x.lower(), entry["links"][key])) + entry["links"][key] = [x for x in entry["links"][key] if x and x.lower() in small] + + return entry + + def validate(entry: dict): """ Validates the entry. Catch errors and tell warnings related to the entry. @@ -256,17 +382,34 @@ def validate(entry: dict): 2: Warnings that may effect user experience when interacting with the entry 3: Errors that make the entry inaccessible or broken. """ + return_status = 0 if (not "id" in entry or (not entry['id'] and not entry['id'] == 0)): print(f"Wait, no id here! How did this happened? {entry}") return_status = 3 entry['id'] = '[MISSING_ID]' - if not ("path" in entry and isinstance(entry["path"], list) and len(entry["path"]) > 0): - print(f"Entry {entry['id']} has no points!") - return_status = 3 - elif len(entry["path"]) < 3: - print(f"Entry {entry['id']} only has {len(entry['path'])} point(s)!") + + if "path" in entry: + if isinstance(entry['path'], list): + if len(entry["path"]) > 0: + print(f"Entry {entry['id']} has no points!") + return_status = 3 + elif len(entry["path"]) < 3: + print(f"Entry {entry['id']} only has {len(entry['path'])} point(s)!") + return_status = 3 + else: + for key in entry['path']: + path = entry['path'][key] + if len(path) > 0: + print(f"Period {key} of entry {entry['id']} has no points!") + return_status = 3 + elif len(path) < 3: + print(f"Period {key} of entry {entry['id']} only has {len(entry['path'])} point(s)!") + return_status = 3 + else: + print(f"Entry {entry['id']} has no path at all!") return_status = 3 + for key in entry: if key in VALIDATE_REGEX and not re.match(VALIDATE_REGEX[key], entry[key]): if return_status < 2: return_status = 2 @@ -316,6 +459,8 @@ def print_(*args, **kwargs): entry = remove_duplicate_points(entry) print_("Updating center...") entry = update_center(entry) + print_("Remove empty items...") + entry = remove_empty_and_similar(entry) print_("Validating...") status_code = validate(entry) print_("Completed!")