Support some other symbols

This commit is contained in:
Hans5958 2022-04-07 23:25:25 +07:00
parent a2e483f156
commit 796bd09a15

View file

@ -18,7 +18,7 @@
- [place.reddit.com](https://place.reddit.com) - [place.reddit.com](https://place.reddit.com)
""" """
FS_REGEX = { FS_REGEX = {
"commatization": r',*(?: +and)? +', "commatization": r',*(?: +(and|&))? +',
"pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?', "pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
"pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?', "pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*', "pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
@ -74,6 +74,11 @@ def collapse_links(entry: dict):
return entry return entry
def remove_extras(entry: dict): def remove_extras(entry: dict):
if "subreddit" in entry and entry["subreddit"]:
# if not entry["subreddit"].startswith('/r/'):
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
entry["subreddit"] = re.sub(r'[.,]+$', r'', entry["subreddit"])
for key in entry: for key in entry:
if not entry[key] or not isinstance(entry[key], str): if not entry[key] or not isinstance(entry[key], str):
continue continue
@ -90,9 +95,6 @@ def remove_extras(entry: dict):
if entry[key] in ["n/a", "N/A", "na", "NA", "-", "null", "none", "None"]: if entry[key] in ["n/a", "N/A", "na", "NA", "-", "null", "none", "None"]:
entry[key] = "" entry[key] = ""
# if "subreddit" in entry and entry["subreddit"] and not entry["subreddit"].startswith('/r/'):
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
return entry return entry
def fix_r_caps(entry: dict): def fix_r_caps(entry: dict):