mirror of
https://github.com/placeAtlas/atlas.git
synced 2024-12-26 17:54:06 +01:00
Merge pull request #1074 from placeAtlas/cleanup
This commit is contained in:
commit
23db7f330d
8 changed files with 975 additions and 2166 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -13,3 +13,4 @@ combined.js
|
|||
*.DS_Store
|
||||
.vscode/
|
||||
_img/place/
|
||||
web/atlas-before-ids-migration.json
|
||||
|
|
213
tools/formatter.py
Normal file
213
tools/formatter.py
Normal file
|
@ -0,0 +1,213 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
"""
|
||||
Examples:
|
||||
1. - /r/place
|
||||
- r/place
|
||||
2. /rplace
|
||||
3. - https://www.reddit.com/r/place
|
||||
- www.reddit.com/r/place
|
||||
- reddit.com/r/place
|
||||
UNUSED AND FAULTY
|
||||
4. - https://place.reddit.com
|
||||
- place.reddit.com
|
||||
5. - [https://place.reddit.com](https://place.reddit.com)
|
||||
- [place.reddit.com](https://place.reddit.com)
|
||||
"""
|
||||
FS_REGEX = {
|
||||
"commatization": r'( *(,+ +|,+ |,+)| +)(and|&|;)( *(,+ +|,+ |,+)| +)|, *$| +',
|
||||
"pattern1": r'\/*[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||
"pattern2": r'^\/*[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||
"pattern3": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
||||
"pattern1user": r'\/*(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||
"pattern2user": r'^\/*(?:u|user)(?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?',
|
||||
"pattern3user": r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/(?:u|user)\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*',
|
||||
# "pattern4": r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*',
|
||||
# "pattern5": r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"',
|
||||
}
|
||||
|
||||
VALIDATE_REGEX = {
|
||||
"subreddit": r'^ *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{1,20}) *(, *\/?r\/([A-Za-z0-9][A-Za-z0-9_]{1,20}) *)*$|^$',
|
||||
"website": r'^https?://[^\s/$.?#].[^\s]*$|^$'
|
||||
}
|
||||
|
||||
CL_REGEX = r'\[(.+?)\]\((.+?)\)'
|
||||
CWTS_REGEX = r'^(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/)$'
|
||||
CSTW_REGEX = {
|
||||
"website": r'^https?://[^\s/$.?#].[^\s]*$',
|
||||
"user": r'^\/*u\/([A-Za-z0-9][A-Za-z0-9_]{1,20})$'
|
||||
}
|
||||
|
||||
# r/... to /r/...
|
||||
SUBREDDIT_TEMPLATE = r"/r/\1"
|
||||
USER_TEMPLATE = r"/u/\1"
|
||||
|
||||
def format_subreddit(entry: dict):
|
||||
if not "subreddit" in entry or not entry['subreddit']:
|
||||
return entry
|
||||
|
||||
subredditLink = entry["subreddit"]
|
||||
subredditLink = re.sub(FS_REGEX["commatization"], ', ', subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2"], SUBREDDIT_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern3user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern1user"], USER_TEMPLATE, subredditLink)
|
||||
subredditLink = re.sub(FS_REGEX["pattern2user"], USER_TEMPLATE, subredditLink)
|
||||
|
||||
if not subredditLink:
|
||||
return entry
|
||||
|
||||
entry["subreddit"] = subredditLink
|
||||
return entry
|
||||
|
||||
def collapse_links(entry: dict):
|
||||
if not "website" in entry or not entry['website']:
|
||||
return entry
|
||||
|
||||
website = entry["website"];
|
||||
if re.search(CL_REGEX, website):
|
||||
match = re.search(CL_REGEX, website)
|
||||
if match.group(1) == match.group(2):
|
||||
website = match.group(2)
|
||||
|
||||
entry["website"] = website
|
||||
return entry
|
||||
|
||||
def remove_extras(entry: dict):
|
||||
if "subreddit" in entry and entry["subreddit"]:
|
||||
# if not entry["subreddit"].startswith('/r/'):
|
||||
# entry["subreddit"] = re.sub(r'^(.*)(?=\/r\/)', r'', entry["subreddit"])
|
||||
entry["subreddit"] = re.sub(r'[.,]+$', r'', entry["subreddit"])
|
||||
|
||||
for key in entry:
|
||||
if not entry[key] or not isinstance(entry[key], str):
|
||||
continue
|
||||
# Leading and trailing spaces
|
||||
entry[key] = entry[key].strip()
|
||||
# Double characters
|
||||
entry[key] = re.sub(r' {2,}(?!\n)', r' ', entry[key])
|
||||
entry[key] = re.sub(r' {3,}\n', r' ', entry[key])
|
||||
entry[key] = re.sub(r'\n{3,}', r'\n\n', entry[key])
|
||||
entry[key] = re.sub(r'r\/{2,}', r'r\/', entry[key])
|
||||
entry[key] = re.sub(r',{2,}', r',', entry[key])
|
||||
# Psuedo-empty strings
|
||||
if entry[key] in ["n/a", "N/A", "na", "NA", "-", "null", "none", "None"]:
|
||||
entry[key] = ""
|
||||
|
||||
return entry
|
||||
|
||||
def fix_r_caps(entry: dict):
|
||||
if not "description" in entry or not entry['description']:
|
||||
return entry
|
||||
|
||||
entry["description"] = re.sub(r'([^\w]|^)\/R\/', '\1/r/', entry["description"])
|
||||
entry["description"] = re.sub(r'([^\w]|^)R\/', '\1r/', entry["description"])
|
||||
|
||||
return entry
|
||||
|
||||
def fix_no_protocol_urls(entry: dict):
|
||||
if not "website" in entry or not entry['website']:
|
||||
return entry
|
||||
|
||||
if not entry["website"].startswith("http"):
|
||||
entry["website"] = "https://" + entry["website"]
|
||||
|
||||
return entry
|
||||
|
||||
def convert_website_to_subreddit(entry: dict):
|
||||
if not "website" in entry or not entry['website']:
|
||||
return entry
|
||||
|
||||
if re.match(CWTS_REGEX, entry["website"]):
|
||||
new_subreddit = re.sub(CWTS_REGEX, SUBREDDIT_TEMPLATE, entry["website"])
|
||||
if (new_subreddit.lower() == entry["subreddit"].lower()):
|
||||
entry["website"] = ""
|
||||
elif not "subreddit" in entry or entry['subreddit'] == "":
|
||||
entry["subreddit"] = new_subreddit
|
||||
entry["website"] = ""
|
||||
|
||||
return entry
|
||||
|
||||
def convert_subreddit_to_website(entry: dict):
|
||||
if not "subreddit" in entry or not entry['subreddit']:
|
||||
return entry
|
||||
|
||||
if re.match(CSTW_REGEX["website"], entry["subreddit"]):
|
||||
if (entry["website"].lower() == entry["subreddit"].lower()):
|
||||
entry["subreddit"] = ""
|
||||
elif not "website" in entry or entry['website'] == "":
|
||||
entry["website"] = entry["subreddit"]
|
||||
entry["subreddit"] = ""
|
||||
elif re.match(CSTW_REGEX["user"], entry["subreddit"]):
|
||||
if not "website" in entry or entry['website'] == "":
|
||||
username = re.match(CSTW_REGEX["user"], entry["subreddit"]).group(1)
|
||||
entry["website"] = "https://www.reddit.com/user/" + username
|
||||
entry["subreddit"] = ""
|
||||
|
||||
return entry
|
||||
|
||||
def validate(entry: dict):
|
||||
if (not "id" in entry or (not entry['id'] and not entry['id'] == 0)):
|
||||
print(f"Wait, no id here! How did this happened? {entry}")
|
||||
return
|
||||
for key in entry:
|
||||
if key in VALIDATE_REGEX and not re.match(VALIDATE_REGEX[key], entry[key]):
|
||||
print(f"{key} of entry {entry['id']} is still invalid! {entry[key]}")
|
||||
|
||||
def per_line_entries(entries: list):
|
||||
out = "[\n"
|
||||
for entry in entries:
|
||||
out += json.dumps(entry) + ",\n"
|
||||
out = out[:-2] + "\n]"
|
||||
return out
|
||||
|
||||
def format_all(entry: dict, silent=False):
|
||||
def print_(*args, **kwargs):
|
||||
if not silent:
|
||||
print(*args, **kwargs)
|
||||
print_("Fixing r/ capitalization...")
|
||||
entry = fix_r_caps(entry)
|
||||
print_("Fixing links without protocol...")
|
||||
entry = fix_no_protocol_urls(entry)
|
||||
print_("Fix formatting of subreddit...")
|
||||
entry = format_subreddit(entry)
|
||||
print_("Collapsing Markdown links...")
|
||||
entry = collapse_links(entry)
|
||||
print_("Converting website links to subreddit (if possible)...")
|
||||
entry = convert_website_to_subreddit(entry)
|
||||
print_("Converting subreddit links to website (if needed)...")
|
||||
entry = convert_subreddit_to_website(entry)
|
||||
print_("Removing extras...")
|
||||
entry = remove_extras(entry)
|
||||
print_("Validating...")
|
||||
validate(entry)
|
||||
print_("Completed!")
|
||||
return entry
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Formatting {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
entries = json.loads(f1.read())
|
||||
|
||||
for i in range(len(entries)):
|
||||
entries[i] = format_all(entries[i], True)
|
||||
if not (i % 500):
|
||||
print(f"{i} checked.")
|
||||
|
||||
print(f"{len(entries)} checked.")
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(per_line_entries(entries))
|
||||
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
|
@ -1,25 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
pattern = re.compile(r'\[(.+?)\]\((.+?)\)')
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
for i in range(2):
|
||||
for match in pattern.finditer(contents):
|
||||
if match.group(1) == match.group(2):
|
||||
contents = contents.replace(match.group(0), match.group(2), 1)
|
||||
print(f"Stage {i+1} completed.")
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
|
@ -1,37 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
contents = re.sub(r'": "(\s+)', r'": "', contents)
|
||||
contents = re.sub(r'(\s+)"(, |,|\})', r'"\2', contents)
|
||||
print("Leading and trailing spaces removed.")
|
||||
|
||||
contents = re.sub(r' {2,}', r' ', contents)
|
||||
print("Double spaces removed.")
|
||||
|
||||
contents = re.sub(r',{2,}', r',', contents)
|
||||
print("Double commas removed.")
|
||||
|
||||
contents = re.sub(r'"n/a"', '""', contents)
|
||||
contents = re.sub(r'"N/A"', '""', contents)
|
||||
contents = re.sub(r'"-"', '""', contents)
|
||||
contents = re.sub(r'"none"', '""', contents)
|
||||
contents = re.sub(r'"null"', '""', contents)
|
||||
print("Psuedo-empty strings converted into empty strings.")
|
||||
|
||||
contents = re.sub(r'R\/', 'r/', contents)
|
||||
print("Capitalization of r/ has been fixed.")
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
|
@ -1,24 +1,33 @@
|
|||
|
||||
import praw
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
import os
|
||||
import traceback
|
||||
from formatter import format_all
|
||||
|
||||
outfile = open('temp_atlas.json', 'w', encoding='utf-8')
|
||||
failfile = open('manual_atlas.json', 'w', encoding='utf-8')
|
||||
|
||||
credentials = open('credentials', 'r')
|
||||
client_id = credentials.readline().strip(' \t\n\r')
|
||||
client_secret = credentials.readline().strip(' \t\n\r')
|
||||
user = credentials.readline().strip(' \t\n\r')
|
||||
pw = credentials.readline().strip(' \t\n\r')
|
||||
with open('credentials', 'r') as file:
|
||||
credentials = file.readlines()
|
||||
client_id = credentials[0].strip()
|
||||
client_secret = credentials[1].strip()
|
||||
username = credentials[2].strip()
|
||||
password = credentials[3].strip()
|
||||
|
||||
reddit = praw.Reddit(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
username=username,
|
||||
password=password,
|
||||
user_agent='atlas_bot'
|
||||
)
|
||||
|
||||
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent='atlas_bot',username=user,password=pw)
|
||||
has_write_access = not reddit.read_only
|
||||
if not has_write_access:
|
||||
print("Warning: No write access. Post flairs will not be updated")
|
||||
sleep(5)
|
||||
print("Warning: No write access. Post flairs will not be updated.")
|
||||
time.sleep(5)
|
||||
|
||||
jsonfile = open("../web/atlas.json", "r", encoding='utf-8')
|
||||
existing = json.load(jsonfile)
|
||||
|
@ -39,6 +48,7 @@ def set_flair(submission, flair):
|
|||
failcount = 0
|
||||
successcount = 0
|
||||
totalcount = 0
|
||||
|
||||
outfile.write("[\n")
|
||||
for submission in reddit.subreddit('placeAtlas2').new(limit=2000):
|
||||
"""
|
||||
|
@ -48,12 +58,14 @@ def set_flair(submission, flair):
|
|||
3. Give it a name and description
|
||||
4. Select "script"
|
||||
5. Redirect to http://localhost:8080
|
||||
6. Copy ID (under Personal Use Script)
|
||||
7. Append to file called "credentials"
|
||||
8. Copy Secret
|
||||
9. Append on newline to "credentials" file
|
||||
10. If you want flair write access append 2 newlines with username and password (Must be a mod, don't do this if you don't know what you're doing)
|
||||
11. Run Script
|
||||
6. Create file "credentials" with the format below.
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ [ID] <- Under "personal use script" │
|
||||
│ [Secret] │
|
||||
│ [Username] <- Must be a mod, don't do this if you │
|
||||
│ [Password] <- don't know what you are doing. │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
7. Run Script
|
||||
|
||||
Running Script
|
||||
1. Input the next ID to use
|
||||
|
@ -63,6 +75,7 @@ def set_flair(submission, flair):
|
|||
|
||||
"""
|
||||
total_all_flairs += 1
|
||||
|
||||
if (submission.id in existing_ids):
|
||||
set_flair(submission, "Processed Entry")
|
||||
print("Found first duplicate!")
|
||||
|
@ -71,40 +84,59 @@ def set_flair(submission, flair):
|
|||
break
|
||||
else:
|
||||
continue
|
||||
if(submission.link_flair_text == "New Entry"):
|
||||
|
||||
if (submission.link_flair_text == "New Entry"):
|
||||
|
||||
try:
|
||||
|
||||
text = submission.selftext
|
||||
#Old backslash filter:
|
||||
#text = text.replace("\\", "")
|
||||
#New one: One \\ escapes a backslash in python's parser
|
||||
# Two escape it again in the regex parser, so \\\\ is \
|
||||
# Then anything but " or n is replaced with the first capture group (anything but " or n)
|
||||
# Test in repl: re.sub("\\\\([^\"n])", "\\1", "\\t < removed slash, t stays and > stays \\n \\\"")
|
||||
text = re.sub("\\\\([^\"n])", "\\1", text)
|
||||
rawtext = text
|
||||
|
||||
text = text.replace('\u200c', '')
|
||||
text = re.compile(r".*(\{.+\}).*", re.DOTALL).search(text).group(1)
|
||||
# Test if it needs to escape the escape character. Usually happens on fancy mode.
|
||||
try: json.loads(text)
|
||||
except json.JSONDecodeError: text = re.sub(r"\\(.)", r"\1", text)
|
||||
|
||||
submission_json = json.loads(text)
|
||||
|
||||
if submission_json:
|
||||
|
||||
# Assert if path does not empty
|
||||
assert len(submission_json["path"]) > 0
|
||||
|
||||
submission_json_dummy = {"id": submission.id, "submitted_by": ""}
|
||||
try:
|
||||
text = text.replace("\"id\": 0,", "\"id\": 0,\n\t\t\"submitted_by\": \""+submission.author.name+"\",")
|
||||
submission_json_dummy["submitted_by"] = submission.author.name
|
||||
except AttributeError:
|
||||
text = text.replace("\"id\": 0,", "\"id\": 0,\n\t\t\"submitted_by\": \""+"unknown"+"\",")
|
||||
submission_json_dummy["submitted_by"] = "unknown"
|
||||
for key in submission_json:
|
||||
if not key in submission_json_dummy:
|
||||
submission_json_dummy[key] = submission_json[key];
|
||||
submission_json = format_all(submission_json_dummy, True)
|
||||
|
||||
|
||||
lines = text.split("\n")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if("\"id\": 0" in line):
|
||||
lines[i] = line.replace("\"id\": 0", "\"id\": "+"\""+str(submission.id)+"\"")
|
||||
text = "\n".join(lines)
|
||||
try:
|
||||
outfile.write(json.dumps(json.loads(text))+" ,\n")
|
||||
outfile.write(json.dumps(submission_json) + ",\n")
|
||||
successcount += 1
|
||||
set_flair(submission, "Processed Entry")
|
||||
except json.JSONDecodeError:
|
||||
failfile.write(text+",\n")
|
||||
|
||||
except Exception as e:
|
||||
failfile.write(
|
||||
"\n\n" + "="*40 + "\n\n" +
|
||||
submission.id + "\n\n" +
|
||||
traceback.format_exc() + "\n\n" +
|
||||
"==== RAW ====" + "\n\n" +
|
||||
rawtext + "\n\n"
|
||||
"==== CLEAN ====" + "\n\n" +
|
||||
text + "\n\n"
|
||||
)
|
||||
failcount += 1
|
||||
set_flair(submission, "Rejected Entry")
|
||||
print("written "+submission.id+" submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
|
||||
|
||||
print("Wrote "+submission.id+", submitted "+str(round(time.time()-submission.created_utc))+" seconds ago")
|
||||
totalcount += 1
|
||||
|
||||
# Remove ,\n
|
||||
outfile.seek(outfile.tell()-4, os.SEEK_SET)
|
||||
# Remove last trailing comma
|
||||
outfile.seek(outfile.tell()-3, os.SEEK_SET)
|
||||
outfile.truncate()
|
||||
|
||||
outfile.write("\n]")
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import re
|
||||
|
||||
patternParent = re.compile(r'"subreddit": ?"(?!")(.+?)"')
|
||||
patternCommatization = re.compile(r',* +')
|
||||
pattern1 = re.compile(r'\/?[rR]\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
||||
pattern2 = re.compile(r'^\/?[rR](?!\/)([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/$)?')
|
||||
pattern3 = re.compile(r'(?:(?:https?:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*')
|
||||
pattern4 = re.compile(r'\[[A-Za-z0-9][A-Za-z0-9_]{1,20}\]\((?:(?:https:\/\/)?(?:(?:www|old|new|np)\.)?)?reddit\.com\/r\/([A-Za-z0-9][A-Za-z0-9_]{1,20})(?:\/[^" ]*)*\)')
|
||||
# pattern5 = re.compile(r'(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*')
|
||||
# pattern6 = re.compile(r'\[(?:https?:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\]\((?:https:\/\/)?(?!^www\.)(.+)\.reddit\.com(?:\/[^"]*)*\)"')
|
||||
"""
|
||||
Examples:
|
||||
1. - /r/place
|
||||
- r/place
|
||||
2. /rplace
|
||||
3. - https://www.reddit.com/r/place
|
||||
- www.reddit.com/r/place
|
||||
- reddit.com/r/place
|
||||
4. - [https://www.reddit.com/r/place](https://www.reddit.com/r/place)
|
||||
- [www.reddit.com/r/place](www.reddit.com/r/place)
|
||||
- [reddit.com/r/place](reddit.com/r/place)
|
||||
UNUSED AND FAULTY
|
||||
5. - https://place.reddit.com
|
||||
- place.reddit.com
|
||||
6. - [https://place.reddit.com](https://place.reddit.com)
|
||||
- [place.reddit.com](https://place.reddit.com)
|
||||
"""
|
||||
|
||||
def replaceStage1(contents: str):
|
||||
contents = re.sub(patternCommatization, ', ', contents)
|
||||
|
||||
# r/... to /r/.. (change if not needed)
|
||||
template = r"/r/\1"
|
||||
contents = re.sub(pattern4, template, contents)
|
||||
contents = re.sub(pattern3, template, contents)
|
||||
contents = re.sub(pattern1, template, contents)
|
||||
contents = re.sub(pattern2, template, contents)
|
||||
return contents
|
||||
|
||||
def go(path):
|
||||
|
||||
print(f"Fixing {path}...")
|
||||
|
||||
with open(path, "r+", encoding='UTF-8') as f1:
|
||||
contents = f1.read()
|
||||
|
||||
# Convert to r/... format first.
|
||||
for matchParent in patternParent.finditer(contents):
|
||||
subredditLink = matchParent.group(1)
|
||||
subredditLink = replaceStage1(subredditLink)
|
||||
if not subredditLink:
|
||||
continue
|
||||
if path == "../web/atlas-before-ids-migration.json":
|
||||
contents = contents.replace(matchParent.group(0), '"subreddit":"' + subredditLink + '"', 1)
|
||||
else:
|
||||
contents = contents.replace(matchParent.group(0), '"subreddit": "' + subredditLink + '"', 1)
|
||||
|
||||
with open(path, "w", encoding='UTF-8') as f2:
|
||||
f2.write(contents)
|
||||
print("Writing completed. All done.")
|
||||
|
||||
go("../web/atlas.json")
|
||||
go("../web/atlas-before-ids-migration.json")
|
File diff suppressed because it is too large
Load diff
1379
web/atlas.json
1379
web/atlas.json
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue