mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-11-16 15:01:44 +01:00
Move subreddit-formatted websites to subreddit
This commit is contained in:
parent
68690b2c39
commit
69ecd7351f
1 changed files with 15 additions and 5 deletions
|
@ -35,7 +35,10 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
CL_REGEX = r'\[(.+?)\]\((.+?)\)'
|
CL_REGEX = r'\[(.+?)\]\((.+?)\)'
|
||||||
CWTS_REGEX = r'^(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/)$'
|
CWTS_REGEX = {
|
||||||
|
"url": r'^(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/)$',
|
||||||
|
"subreddit": r'^\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})\/?$'
|
||||||
|
}
|
||||||
CSTW_REGEX = {
|
CSTW_REGEX = {
|
||||||
"website": r'^https?://[^\s/$.?#].[^\s]*$',
|
"website": r'^https?://[^\s/$.?#].[^\s]*$',
|
||||||
"user": r'^\/*u\/([A-Za-z0-9][A-Za-z0-9_]{1,20})$'
|
"user": r'^\/*u\/([A-Za-z0-9][A-Za-z0-9_]{1,20})$'
|
||||||
|
@ -122,8 +125,15 @@ def convert_website_to_subreddit(entry: dict):
|
||||||
if not "website" in entry or not entry['website']:
|
if not "website" in entry or not entry['website']:
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
if re.match(CWTS_REGEX, entry["website"]):
|
if re.match(CWTS_REGEX["url"], entry["website"]):
|
||||||
new_subreddit = re.sub(CWTS_REGEX, SUBREDDIT_TEMPLATE, entry["website"])
|
new_subreddit = re.sub(CWTS_REGEX["url"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||||
|
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||||
|
entry["website"] = ""
|
||||||
|
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||||
|
entry["subreddit"] = new_subreddit
|
||||||
|
entry["website"] = ""
|
||||||
|
elif re.match(CWTS_REGEX["subreddit"], entry["website"]):
|
||||||
|
new_subreddit = re.sub(CWTS_REGEX["subreddit"], SUBREDDIT_TEMPLATE, entry["website"])
|
||||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||||
entry["website"] = ""
|
entry["website"] = ""
|
||||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||||
|
@ -171,8 +181,6 @@ def print_(*args, **kwargs):
|
||||||
print(*args, **kwargs)
|
print(*args, **kwargs)
|
||||||
print_("Fixing r/ capitalization...")
|
print_("Fixing r/ capitalization...")
|
||||||
entry = fix_r_caps(entry)
|
entry = fix_r_caps(entry)
|
||||||
print_("Fixing links without protocol...")
|
|
||||||
entry = fix_no_protocol_urls(entry)
|
|
||||||
print_("Fix formatting of subreddit...")
|
print_("Fix formatting of subreddit...")
|
||||||
entry = format_subreddit(entry)
|
entry = format_subreddit(entry)
|
||||||
print_("Collapsing Markdown links...")
|
print_("Collapsing Markdown links...")
|
||||||
|
@ -181,6 +189,8 @@ def print_(*args, **kwargs):
|
||||||
entry = convert_website_to_subreddit(entry)
|
entry = convert_website_to_subreddit(entry)
|
||||||
print_("Converting subreddit links to website (if needed)...")
|
print_("Converting subreddit links to website (if needed)...")
|
||||||
entry = convert_subreddit_to_website(entry)
|
entry = convert_subreddit_to_website(entry)
|
||||||
|
print_("Fixing links without protocol...")
|
||||||
|
entry = fix_no_protocol_urls(entry)
|
||||||
print_("Removing extras...")
|
print_("Removing extras...")
|
||||||
entry = remove_extras(entry)
|
entry = remove_extras(entry)
|
||||||
print_("Validating...")
|
print_("Validating...")
|
||||||
|
|
Loading…
Reference in a new issue