diff --git a/ja-yomi-remove.py b/ja-yomi-remove.py new file mode 100644 index 0000000..197bca4 --- /dev/null +++ b/ja-yomi-remove.py @@ -0,0 +1,60 @@ +from typing import Generator +import pywikibot +import regex as re + + +JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi" +# YOMIS = "(?:o|on|go|goon|ko|kan|kanon|so|soon|to|toon|ky|kanyo|kanyoon|k|kun|j|ju|y|yu|i|irr|irreg|irregular)" +REMOVE_YOMI_PATTERN = re.compile(r"({{ja-pron(?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*?)\|(?:y|yomi)=(?:o|on|go|goon|ko|kan|kanon|so|soon|to|toon|ky|kanyo|kanyoon|k|kun|j|ju|y|yu|i|irr|irreg|irregular)((?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*}})") +JA_PRON_PATTERN = re.compile(r"{{ja-pron(?:\|[^\|]+?=[^\|]+?|\|[^\|]+)*}}") + + +def get_yomi_pages() -> Generator[pywikibot.Page, None, None]: + SITE = pywikibot.Site("en", "wiktionary") + TEMPLATE_NAMESPACE = SITE.namespaces.TEMPLATE + MAIN_NAMESPACE = SITE.namespaces.MAIN + return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE]) + + +def remove_yomi_from_page(page: pywikibot.Page) -> None: + """ + Given a page on en.wiktionary, it removes any occurrences of `|y=` or `|yomi=` + from the source within {{ja-pron}} templates. + """ + text = page.text + new_text = REMOVE_YOMI_PATTERN.sub(r"\1\2", text) + page.text = new_text + + +def template_argument_counts_accord(previous_text: str, current_text: str) -> bool: + """ + Gets the previous and current renditions of the wikitext, + ensuring that the number of template arguments for each occurrence of {{ja-pron}} + is exactly 1 less in the current (new) text than it is in the old text. + If this does not hold true, returns `False`, else `True`. + Of course, this is because the new text should not have `y=` or `yomi=` in it, + so the number of arguments should be exactly one less once this has been removed. + """ + for previous_pron, current_pron in zip(JA_PRON_PATTERN.finditer(previous_text), JA_PRON_PATTERN.finditer(current_text)): + prev_pr_text = previous_pron.group(0) + curr_pr_text = current_pron.group(0) + previous_arg_count = prev_pr_text.count("|") + current_arg_count = curr_pr_text.count("|") + if current_arg_count != previous_arg_count - 1: + print(previous_arg_count, current_arg_count) + return False + return True + +def main(): + for i, page in enumerate(get_yomi_pages()): + if i == 1: + return + original_text = page.text + print(f"Removing yomi from {page.title()}") + remove_yomi_from_page(page) + assert template_argument_counts_accord(original_text, page.text) + page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True) + + +if __name__ == "__main__": + main()