mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-12-26 18:24:10 +01:00
Support some other symbols
This commit is contained in:
parent
a2e483f156
commit
796bd09a15
1 changed files with 6 additions and 4 deletions
|
@ -18,7 +18,7 @@
|
||||||
- [place.reddit.com](https://place.reddit.com)
|
- [place.reddit.com](https://place.reddit.com)
|
||||||
"""
|
"""
|
||||||
FS_REGEX = {
|
FS_REGEX = {
|
||||||
"commatization": r',*(?: +and)? +',
|
"commatization": r',*(?: +(and|&))? +',
|
||||||
"pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
"pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||||
"pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
"pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||||
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
||||||
|
@ -74,6 +74,11 @@ def collapse_links(entry: dict):
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
def remove_extras(entry: dict):
|
def remove_extras(entry: dict):
|
||||||
|
if "subreddit" in entry and entry["subreddit"]:
|
||||||
|
# if not entry["subreddit"].startswith('/r/'):
|
||||||
|
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
|
||||||
|
entry["subreddit"] = re.sub(r'[.,]+$', r'', entry["subreddit"])
|
||||||
|
|
||||||
for key in entry:
|
for key in entry:
|
||||||
if not entry[key] or not isinstance(entry[key], str):
|
if not entry[key] or not isinstance(entry[key], str):
|
||||||
continue
|
continue
|
||||||
|
@ -90,9 +95,6 @@ def remove_extras(entry: dict):
|
||||||
if entry[key] in ["n/a", "N/A", "na", "NA", "-", "null", "none", "None"]:
|
if entry[key] in ["n/a", "N/A", "na", "NA", "-", "null", "none", "None"]:
|
||||||
entry[key] = ""
|
entry[key] = ""
|
||||||
|
|
||||||
# if "subreddit" in entry and entry["subreddit"] and not entry["subreddit"].startswith('/r/'):
|
|
||||||
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
|
|
||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
def fix_r_caps(entry: dict):
|
def fix_r_caps(entry: dict):
|
||||||
|
|
Loading…
Reference in a new issue