mirror of
https://github.com/placeAtlas/atlas.git
synced 2025-01-13 13:03:46 +01:00
Add and improve scripts
This commit is contained in:
parent
8c57d43de7
commit
a097844861
3 changed files with 97 additions and 31 deletions
|
@ -5,19 +5,21 @@
|
|||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
for match in pattern.finditer(contents):
|
||||
if match.group(1) == match.group(2):
|
||||
contents = contents.replace(match.group(0), match.group(2), 1)
|
||||
|
||||
for match in pattern.finditer(contents):
|
||||
if match.group(1) == match.group(2):
|
||||
contents = contents.replace(match.group(0), match.group(2), 1)
|
||||
for i in range(2):
|
||||
for match in pattern.finditer(contents):
|
||||
if match.group(1) == match.group(2):
|
||||
contents = contents.replace(match.group(0), match.group(2), 1)
|
||||
print(f"Stage {i+1} completed.")
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
37
tools/misc-formats.py
Normal file
37
tools/misc-formats.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
contents = re.sub(r'": "(\s+)', r'": "', contents)
|
||||
contents = re.sub(r'(\s+)"(, |,|\})', r'"\2', contents)
|
||||
print("Leading and trailing spaces removed.")
|
||||
|
||||
contents = re.sub(r' {2,}', r' ', contents)
|
||||
print("Double spaces removed.")
|
||||
|
||||
contents = re.sub(r',{2,}', r',', contents)
|
||||
print("Double commas removed.")
|
||||
|
||||
contents = re.sub(r'"n/a"', '""', contents)
|
||||
contents = re.sub(r'"N/A"', '""', contents)
|
||||
contents = re.sub(r'"-"', '""', contents)
|
||||
contents = re.sub(r'"none"', '""', contents)
|
||||
contents = re.sub(r'"null"', '""', contents)
|
||||
print("Psuedo-empty strings converted into empty strings.")
|
||||
|
||||
contents = re.sub(r'R\/', 'r/', contents)
|
||||
print("Capitalization of r/ has been fixed.")
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
|
@ -1,38 +1,65 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
pattern1 = re.compile(r'"subreddit": "\/r\/(.+?)/?"')
|
||||
pattern2 = re.compile(r'"subreddit": "r\/(.+?)/?"')
|
||||
pattern3 = re.compile(r'"subreddit": "\/?r(?!\/)(.+?)/?"')
|
||||
pattern4 = re.compile(r'"subreddit": "(?:(?:https:\/\/)?www.)?reddit.com\/r\/(.+?)(/[^"]*)*"')
|
||||
pattern5 = re.compile(r'"subreddit": "\[(?:(?:https:\/\/)?www.)?reddit.com\/r\/(.+?)(/[^"]*)*\]\((?:(?:https:\/\/)?www.)?reddit.com\/r\/(.+?)(/[^"]*)*\)"')
|
||||
|
||||
patternParent = re.compile(r'"subreddit": ?"(?!")(.+?)"')
|
||||
patternCommatization = re.compile(r',* +')
|
||||
pattern1 = re.compile(r'\/?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
||||
pattern2 = re.compile(r'^\/?[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
||||
pattern3 = re.compile(r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*')
|
||||
pattern4 = re.compile(r'\[[A-Za-z0-9][A-Za-z0-9_]{1,20}\]\((?:(?:https:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*\)')
|
||||
# pattern5 = re.compile(r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*')
|
||||
# pattern6 = re.compile(r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"')
|
||||
"""
|
||||
Examples:
|
||||
1. - /r/place
|
||||
- r/place
|
||||
2. /rplace
|
||||
3. - https://www.reddit.com/r/place
|
||||
- www.reddit.com/r/place
|
||||
- reddit.com/r/place
|
||||
4. - [https://www.reddit.com/r/place](https://www.reddit.com/r/place)
|
||||
- [www.reddit.com/r/place](www.reddit.com/r/place)
|
||||
- [reddit.com/r/place](reddit.com/r/place)
|
||||
UNUSED AND FAULTY
|
||||
5. - https://place.reddit.com
|
||||
- place.reddit.com
|
||||
6. - [https://place.reddit.com](https://place.reddit.com)
|
||||
- [place.reddit.com](https://place.reddit.com)
|
||||
"""
|
||||
|
||||
def replaceStage1(contents: str):
|
||||
contents = re.sub(patternCommatization, ', ', contents)
|
||||
|
||||
# r/... to /r/.. (change if not needed)
|
||||
template = r"/r/\1"
|
||||
contents = re.sub(pattern4, template, contents)
|
||||
contents = re.sub(pattern3, template, contents)
|
||||
contents = re.sub(pattern1, template, contents)
|
||||
contents = re.sub(pattern2, template, contents)
|
||||
return contents
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
for match in pattern5.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "r/' + match.group(2) + '"', 1)
|
||||
|
||||
for match in pattern4.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "r/' + match.group(1) + '"', 1)
|
||||
|
||||
for match in pattern1.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "r/' + match.group(1) + '"', 1)
|
||||
|
||||
for match in pattern2.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "r/' + match.group(1) + '"', 1)
|
||||
|
||||
for match in pattern3.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "r/' + match.group(1) + '"', 1)
|
||||
|
||||
# # r/... to /r/.. (comment if not needed)
|
||||
for match in pattern2.finditer(contents):
|
||||
contents = contents.replace(match.group(0), '"subreddit": "/r/' + match.group(1) + '"', 1)
|
||||
# Convert to r/... format first.
|
||||
for matchParent in patternParent.finditer(contents):
|
||||
subredditLink = matchParent.group(1)
|
||||
subredditLink = replaceStage1(subredditLink)
|
||||
if not subredditLink:
|
||||
continue
|
||||
if path == "../web/atlas-before-ids-migration.json":
|
||||
contents = contents.replace(matchParent.group(0), '"subreddit":"' + subredditLink + '"', 1)
|
||||
else:
|
||||
contents = contents.replace(matchParent.group(0), '"subreddit": "' + subredditLink + '"', 1)
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
Loading…
Reference in a new issue