Fix commatization regex

This commit is contained in:
Hans5958 2022-04-29 17:12:40 +07:00
parent 50585d1686
commit 8019a27a45
2 changed files with 2 additions and 2 deletions

View file

@ -21,7 +21,7 @@
- [place.reddit.com](https://place.reddit.com) - [place.reddit.com](https://place.reddit.com)
""" """
FS_REGEX = { FS_REGEX = {
"commatization": r'( *(,+ +|,+ |,+)| +)(and|&|;)( *(,+ +|,+ |,+)| +)|, *$| +', "commatization": r'[,;& ]+(?:and)?[,;& ]*?',
"pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', "pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?',
"pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?', "pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/$)?',
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*', "pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*',

View file

@ -15,7 +15,7 @@
EXPANSION_1_RANGE = (56, END_IMAGE) EXPANSION_1_RANGE = (56, END_IMAGE)
EXPANSION_2_RANGE = (109, END_IMAGE) EXPANSION_2_RANGE = (109, END_IMAGE)
COMMATIZATION = re.compile(r'(?: *(?:,+ +|,+ |,+)| +)(?:and|&|;)(?: *(?:,+ +|,+ |,+)| +)|, *$| +') COMMATIZATION = re.compile(r'[,;& ]+(?:and)?[,;& ]*?')
FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*') FS_REGEX = re.compile(r'(?:(?:(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com)?\/)?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{2,20})(?:\/[^" ]*)*')
def migrate_atlas_format(entry: dict): def migrate_atlas_format(entry: dict):