mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-11-17 15:31:43 +01:00
Optimize and remove redundant code, make some tamer
This commit is contained in:
parent
ede4bc8208
commit
2db0dcce6c
1 changed files with 15 additions and 15 deletions
|
@ -11,13 +11,10 @@
|
||||||
3. - https://www.reddit.com/r/place
|
3. - https://www.reddit.com/r/place
|
||||||
- www.reddit.com/r/place
|
- www.reddit.com/r/place
|
||||||
- reddit.com/r/place
|
- reddit.com/r/place
|
||||||
4. - [https://www.reddit.com/r/place](https://www.reddit.com/r/place)
|
|
||||||
- [www.reddit.com/r/place](www.reddit.com/r/place)
|
|
||||||
- [reddit.com/r/place](reddit.com/r/place)
|
|
||||||
UNUSED AND FAULTY
|
UNUSED AND FAULTY
|
||||||
5. - https://place.reddit.com
|
4. - https://place.reddit.com
|
||||||
- place.reddit.com
|
- place.reddit.com
|
||||||
6. - [https://place.reddit.com](https://place.reddit.com)
|
5. - [https://place.reddit.com](https://place.reddit.com)
|
||||||
- [place.reddit.com](https://place.reddit.com)
|
- [place.reddit.com](https://place.reddit.com)
|
||||||
"""
|
"""
|
||||||
FS_REGEX = {
|
FS_REGEX = {
|
||||||
|
@ -25,9 +22,8 @@
|
||||||
"pattern1": r'\/?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
"pattern1": r'\/?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||||
"pattern2": r'^\/?[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
"pattern2": r'^\/?[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||||
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
||||||
"pattern4": r'\[[A-Za-z0-9][A-Za-z0-9_]{1,20}\]\((?:(?:https:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*\)',
|
# "pattern4": r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*',
|
||||||
# "pattern5": r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*',
|
# "pattern5": r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"',
|
||||||
# "pattern6": r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CL_REGEX = r'\[(.+?)\]\((.+?)\)'
|
CL_REGEX = r'\[(.+?)\]\((.+?)\)'
|
||||||
|
@ -41,7 +37,6 @@ def format_subreddit(entry: dict):
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
subredditLink = entry["subreddit"]
|
subredditLink = entry["subreddit"]
|
||||||
subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink)
|
|
||||||
subredditLink = re.sub(FS_REGEX["pattern4"], SUBREDDIT_TEMPLATE, subredditLink)
|
subredditLink = re.sub(FS_REGEX["pattern4"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||||
subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink)
|
subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||||
subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink)
|
subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||||
|
@ -73,21 +68,26 @@ def remove_extras(entry: dict):
|
||||||
# Leading and trailing spaces
|
# Leading and trailing spaces
|
||||||
entry[key] = re.sub(r'^(\s+)', r'', entry[key])
|
entry[key] = re.sub(r'^(\s+)', r'', entry[key])
|
||||||
entry[key] = re.sub(r'(\s+)$', r'', entry[key])
|
entry[key] = re.sub(r'(\s+)$', r'', entry[key])
|
||||||
# Double spaces and commas
|
# Double characters
|
||||||
entry[key] = re.sub(r' {2,}', r' ', entry[key])
|
entry[key] = re.sub(r' {2,}', r' ', entry[key])
|
||||||
entry[key] = re.sub(r'\n{2,}', r'\n', entry[key])
|
entry[key] = re.sub(r'\n{2,}', r'\n', entry[key])
|
||||||
|
entry[key] = re.sub(r'\/{2,}', r'\/', entry[key])
|
||||||
entry[key] = re.sub(r',{2,}', r',', entry[key])
|
entry[key] = re.sub(r',{2,}', r',', entry[key])
|
||||||
# Psuedo-empty strings
|
# Psuedo-empty strings
|
||||||
if entry[key] in ["n/a", "N/A", "-", "null", "none", "None"]:
|
if entry[key] in ["n/a", "N/A", "-", "null", "none", "None"]:
|
||||||
entry[key] = ""
|
entry[key] = ""
|
||||||
|
|
||||||
|
# if "subreddit" in entry and entry["subreddit"] and not entry["subreddit"].startswith('/r/'):
|
||||||
|
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
|
||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
def fix_r_caps(entry: dict):
|
def fix_r_caps(entry: dict):
|
||||||
if not "description" in entry or not entry['description']:
|
if not "description" in entry or not entry['description']:
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
entry["description"] = re.sub(r'R\/', 'r/', entry["description"])
|
entry["description"] = re.sub(r'([^\w]|^)\/R\/', '\1/r/', entry["description"])
|
||||||
|
entry["description"] = re.sub(r'([^\w]|^)R\/', '\1r/', entry["description"])
|
||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
@ -125,18 +125,18 @@ def format_all(entry: dict, silent=False):
|
||||||
def print_(*args, **kwargs):
|
def print_(*args, **kwargs):
|
||||||
if not silent:
|
if not silent:
|
||||||
print(*args, **kwargs)
|
print(*args, **kwargs)
|
||||||
print_("Removing extras...")
|
|
||||||
entry = remove_extras(entry)
|
|
||||||
print_("Fixing r/ capitalization...")
|
print_("Fixing r/ capitalization...")
|
||||||
entry = fix_r_caps(entry)
|
entry = fix_r_caps(entry)
|
||||||
print_("Fixing links without protocol...")
|
print_("Fixing links without protocol...")
|
||||||
entry = fix_no_protocol_urls(entry)
|
entry = fix_no_protocol_urls(entry)
|
||||||
print_("Collapsing Markdown links...")
|
|
||||||
entry = collapse_links(entry)
|
|
||||||
print_("Fix formatting of subreddit...")
|
print_("Fix formatting of subreddit...")
|
||||||
entry = format_subreddit(entry)
|
entry = format_subreddit(entry)
|
||||||
|
print_("Collapsing Markdown links...")
|
||||||
|
entry = collapse_links(entry)
|
||||||
print_("Converting website links to subreddit (if possible)...")
|
print_("Converting website links to subreddit (if possible)...")
|
||||||
entry = convert_website_to_subreddit(entry)
|
entry = convert_website_to_subreddit(entry)
|
||||||
|
print_("Removing extras...")
|
||||||
|
entry = remove_extras(entry)
|
||||||
print_("Completed!")
|
print_("Completed!")
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue