Update ja-yomi-remove.py to use mwparserfromhell

This commit is contained in:
Kovachev 2023-06-02 23:09:54 +01:00 committed by GitHub
parent 293a1b05d2
commit e1d51e6441
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,13 +1,13 @@
from typing import Generator from typing import Generator
import pywikibot import pywikibot
import regex as re
import os import os
import subprocess
import mwparserfromhell
from mwparserfromhell.wikicode import Template
from restore_pages import BACKUP_PATH
JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi" JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi"
REMOVE_YOMI_PATTERN = re.compile(r"({{ja-pron(?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*?)\|(?:y|yomi)=(?:o|on|go|goon|ko|kan|kanon|so|soon|to|toon|ky|kanyo|kanyoon|k|kun|j|ju|y|yu|i|irr|irreg|irregular)((?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*}})")
JA_PRON_PATTERN = re.compile(r"{{ja-pron(?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*}}")
BACKUP_PATH = "ja-yomi-backup"
def get_yomi_pages() -> Generator[pywikibot.Page, None, None]: def get_yomi_pages() -> Generator[pywikibot.Page, None, None]:
SITE = pywikibot.Site("en", "wiktionary") SITE = pywikibot.Site("en", "wiktionary")
@ -16,30 +16,45 @@ def get_yomi_pages() -> Generator[pywikibot.Page, None, None]:
return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE]) return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
# The way the pattern works is by forming two capture groups, one on either side of the regex matching for the yomi # Use mwparserfromhell to filter all the templates, select the ja-pron ones, and remove any "y" or "yomi"
# parameter, e.g. ({{ja-pron) |yomi=k (|おんせい}}) # arguments they might have.
# (the yomi is separated here for demonstration purposes, otherwise it is contiguous with the other parameters.)
# The two bracketed text portions you see then substitute the original template, in effect replacing it
# with all of its contents, minus the original yomi (or y) argument.
def remove_yomi_from_page(page: pywikibot.Page) -> None: def remove_yomi_from_page(page: pywikibot.Page) -> None:
""" """
Given a page on en.wiktionary, it removes any occurrences of `|y=` or `|yomi=` Given a page on en.wiktionary, it removes any occurrences of `|y=` or `|yomi=`
from the source within {{ja-pron}} templates. from the source within {{ja-pron}} templates.
""" """
text = page.text text = page.text
new_text = REMOVE_YOMI_PATTERN.sub(r"\1\2", text) parsed = mwparserfromhell.parse(text)
page.text = new_text for template in parsed.ifilter(forcetype=Template, recursive=False):
template: Template
if template.name != "ja-pron":
continue
if template.has("y"):
template.remove("y")
if template.has("yomi"):
template.remove("yomi")
def backup_page(page: pywikibot.Page) -> None: new_text = str(parsed)
return new_text
def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
""" """
Copy the contents of the page to local storage for backup in case there is a problem Copy the contents of the page to local storage for backup in case there is a problem
with the script later; this will allow the error to be automatically corrected at that time. with the script later; this will allow the error to be automatically corrected at that time.
""" """
os.makedirs(BACKUP_PATH, exist_ok=True) os.makedirs(BACKUP_PATH, exist_ok=True)
with open(os.path.join(BACKUP_PATH, page.title()), mode="w", encoding="utf-8") as f: with open("temp1", mode="w", encoding="utf-8") as f:
f.write(page.text) f.write(old_text)
with open("temp2", mode="w", encoding="utf-8") as f:
f.write(current_page.text)
diff = subprocess.getoutput("diff -u temp2 temp1") # Get differences between new revision and previous
diff = diff + "\n" # patch will complain if we don't end the file with a newline
with open(os.path.join(BACKUP_PATH, current_page.title()), mode="w", encoding="utf-8") as f:
f.write(diff)
def template_argument_counts_accord(previous_text: str, current_text: str) -> bool: def template_argument_counts_accord(previous_text: str, current_text: str) -> bool:
""" """
@ -50,14 +65,16 @@ def template_argument_counts_accord(previous_text: str, current_text: str) -> bo
Of course, this is because the new text should not have `y=` or `yomi=` in it, Of course, this is because the new text should not have `y=` or `yomi=` in it,
so the number of arguments should be exactly one less once this has been removed. so the number of arguments should be exactly one less once this has been removed.
""" """
for previous_pron, current_pron in zip(JA_PRON_PATTERN.finditer(previous_text), JA_PRON_PATTERN.finditer(current_text)): for previous_pron, current_pron in zip(mwparserfromhell.parse(previous_text).filter(forcetype=Template, recursive=False), mwparserfromhell.parse(current_text).filter(forcetype=Template, recursive=False)):
prev_pr_text = previous_pron.group(0) previous_pron: Template
curr_pr_text = current_pron.group(0) current_pron: Template
previous_arg_count = prev_pr_text.count("|")
current_arg_count = curr_pr_text.count("|") if previous_pron.name != "ja-pron" or current_pron.name != "ja-pron":
if current_arg_count != previous_arg_count - 1: continue
print(previous_arg_count, current_arg_count)
if len(current_pron.params) != len(previous_pron.params) - 1:
return False return False
return True return True
def main(): def main():
@ -73,10 +90,10 @@ def main():
return return
original_text = page.text original_text = page.text
print(f"Backing up {page.title()}...")
backup_page(page)
print(f"Removing yomi from {page.title()}...") print(f"Removing yomi from {page.title()}...")
remove_yomi_from_page(page) page.text = remove_yomi_from_page(page)
print(f"Backing up {page.title()}...")
create_diff(original_text, page)
assert template_argument_counts_accord(original_text, page.text) assert template_argument_counts_accord(original_text, page.text)
page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True, botflag=True) page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True, botflag=True)