mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-11-02 21:59:41 +01:00
65 lines
No EOL
2.4 KiB
Python
65 lines
No EOL
2.4 KiB
Python
#!/usr/bin/python
|
|
|
|
import re
|
|
|
|
patternParent = re.compile(r'"subreddit": ?"(?!")(.+?)"')
|
|
patternCommatization = re.compile(r',* +')
|
|
pattern1 = re.compile(r'\/?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
|
pattern2 = re.compile(r'^\/?[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
|
pattern3 = re.compile(r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*')
|
|
pattern4 = re.compile(r'\[[A-Za-z0-9][A-Za-z0-9_]{1,20}\]\((?:(?:https:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*\)')
|
|
# pattern5 = re.compile(r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*')
|
|
# pattern6 = re.compile(r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"')
|
|
"""
|
|
Examples:
|
|
1. - /r/place
|
|
- r/place
|
|
2. /rplace
|
|
3. - https://www.reddit.com/r/place
|
|
- www.reddit.com/r/place
|
|
- reddit.com/r/place
|
|
4. - [https://www.reddit.com/r/place](https://www.reddit.com/r/place)
|
|
- [www.reddit.com/r/place](www.reddit.com/r/place)
|
|
- [reddit.com/r/place](reddit.com/r/place)
|
|
UNUSED AND FAULTY
|
|
5. - https://place.reddit.com
|
|
- place.reddit.com
|
|
6. - [https://place.reddit.com](https://place.reddit.com)
|
|
- [place.reddit.com](https://place.reddit.com)
|
|
"""
|
|
|
|
def replaceStage1(contents: str):
|
|
contents = re.sub(patternCommatization, ', ', contents)
|
|
|
|
# r/... to /r/.. (change if not needed)
|
|
template = r"/r/\1"
|
|
contents = re.sub(pattern4, template, contents)
|
|
contents = re.sub(pattern3, template, contents)
|
|
contents = re.sub(pattern1, template, contents)
|
|
contents = re.sub(pattern2, template, contents)
|
|
return contents
|
|
|
|
def go(path):
|
|
|
|
print(f"Fixing {path}...")
|
|
|
|
with open(path, "r+", encoding='UTF-8') as f1:
|
|
contents = f1.read()
|
|
|
|
# Convert to r/... format first.
|
|
for matchParent in patternParent.finditer(contents):
|
|
subredditLink = matchParent.group(1)
|
|
subredditLink = replaceStage1(subredditLink)
|
|
if not subredditLink:
|
|
continue
|
|
if path == "../web/atlas-before-ids-migration.json":
|
|
contents = contents.replace(matchParent.group(0), '"subreddit":"' + subredditLink + '"', 1)
|
|
else:
|
|
contents = contents.replace(matchParent.group(0), '"subreddit": "' + subredditLink + '"', 1)
|
|
|
|
with open(path, "w", encoding='UTF-8') as f2:
|
|
f2.write(contents)
|
|
print("Writing completed. All done.")
|
|
|
|
go("../web/atlas.json")
|
|
go("../web/atlas-before-ids-migration.json") |