From 74b8073a7b73daaca67eab7c575bafa0defe8ec4 Mon Sep 17 00:00:00 2001 From: Hans5958 Date: Tue, 19 Apr 2022 16:42:20 +0700 Subject: [PATCH] Do stricter (and proper) subreddit regex 2 I forgot that I made it so it excludes underscore, here's a proper fix --- tools/formatter.py | 22 +++++++++++----------- tools/migrate_atlas_format.py | 2 +- web/_js/draw.js | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/formatter.py b/tools/formatter.py index 0e8d6e52..391a8b9e 100644 --- a/tools/formatter.py +++ b/tools/formatter.py @@ -22,30 +22,30 @@ """ FS_REGEX = { "commatization": r'( *(,+ +|,+ |,+)| +)(and|&|;)( *(,+ +|,+ |,+)| +)|, *$| +', - "pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/$)?', - "pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/$)?', - "pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/[^" ]*)*', - "pattern1user": r'\/*(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/$)?', - "pattern2user": r'^\/*(?:u|user)(?!\/)([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/$)?', - "pattern3user": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/[^" ]*)*', - "pattern1new": r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/[^" ]*)*' + "pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', + "pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', + "pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*', + "pattern1user": r'\/*(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', + "pattern2user": r'^\/*(?:u|user)(?!\/)([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', + "pattern3user": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*', + "pattern1new": r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*' # "pattern4": r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*', # "pattern5": r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"', } VALIDATE_REGEX = { - "subreddit": r'^ *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{3,21}) *(, *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{3,21}) *)*$|^$', + "subreddit": r'^ *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{2,20}) *(, *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{2,20}) *)*$|^$', "website": r'^https?://[^\s/$.?#].[^\s]*$|^$' } CL_REGEX = r'\[(.+?)\]\((.+?)\)' CWTS_REGEX = { - "url": r'^(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/)$', - "subreddit": r'^\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{3,21})\/?$' + "url": r'^(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/)$', + "subreddit": r'^\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})\/?$' } CSTW_REGEX = { "website": r'^https?://[^\s/$.?#].[^\s]*$', - "user": r'^\/*u\/([A-Za-z0-9][A-Za-z0-9_]{3,21})$' + "user": r'^\/*u\/([A-Za-z0-9][A-Za-z0-9_]{2,20})$' } # r/... to /r/... diff --git a/tools/migrate_atlas_format.py b/tools/migrate_atlas_format.py index 6f48f083..e1de11bd 100644 --- a/tools/migrate_atlas_format.py +++ b/tools/migrate_atlas_format.py @@ -26,7 +26,7 @@ def per_line_entries(entries: list): EXPANSION_2_RANGE = (109, END_IMAGE) COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +') -FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/[^" ]*)*') +FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*') with open(file_path, 'r+', encoding='UTF-8') as file: entries = json.loads(file.read()) diff --git a/web/_js/draw.js b/web/_js/draw.js index 153fb0e6..7967dca4 100644 --- a/web/_js/draw.js +++ b/web/_js/draw.js @@ -272,7 +272,7 @@ function initDraw() { }) const inputWebsite = websiteField.value.split('\n').map(line => line.trim()).filter(line => line) - const inputSubreddit = subredditField.value.split('\n').map(line => line.trim().replace(/(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{3,21})(?:\/[^" ]*)*/, '$1')).filter(line => line) + const inputSubreddit = subredditField.value.split('\n').map(line => line.trim().replace(/(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*/, '$1')).filter(line => line) const inputDiscord = discordField.value.split('\n').map(line => line.trim().replace(/(?:https?:\/\/)?(?:www\.)?(?:(?:discord)?\.?gg|discord(?:app?)\.com\/invite)\/([^\s/]+?)(?=\b)/, '$1')).filter(line => line) const inputWiki = wikiField.value.split('\n').map(line => line.trim().replace(/ /g, '_')).filter(line => line)