Update with library, scritps
This commit is contained in:
parent
e22c88c737
commit
8dc7c70cd9
@ -139,6 +139,10 @@ def update_page(title: str, alphagram: str, uncreated: set[str]) -> bool:
|
||||
else:
|
||||
page.text = new_content
|
||||
plural_s = "s" if len(anagrams_added) > 1 else ""
|
||||
if len(anagrams_added) == 0:
|
||||
print("Nothing was added, but the content was changed! (not saved)")
|
||||
return False
|
||||
|
||||
page.save(f"Added anagram{plural_s} ({', '.join(anagrams_added)}) to Bulgarian section", minor=False)
|
||||
return True
|
||||
else:
|
||||
@ -172,16 +176,49 @@ def main(uncreated: set[str]):
|
||||
|
||||
iterations += 1
|
||||
|
||||
def there_are_erroneous_anagrams(original, anagrams: set[str]) -> bool:
|
||||
for anagram in anagrams:
|
||||
if anagram == original: continue
|
||||
if normalise(anagram) == normalise(original):
|
||||
return True
|
||||
return False
|
||||
|
||||
def find_erroneous_anagrams():
|
||||
errors = []
|
||||
for anagram_list in anagrams.values():
|
||||
for anagram in anagram_list:
|
||||
page = pywikibot.Page(SITE, anagram)
|
||||
|
||||
if not page.exists(): continue
|
||||
if not has_bulgarian(page): continue
|
||||
|
||||
print("Traversing page", anagram + "...")
|
||||
|
||||
for template in mwparserfromhell.parse(page.text).filter(forcetype=mwparserfromhell.wikicode.Template):
|
||||
template: mwparserfromhell.wikicode.Template
|
||||
if template.name != "anagrams": continue
|
||||
if not template.has_param(1): continue
|
||||
if template.get(1) != "bg": continue
|
||||
|
||||
if there_are_erroneous_anagrams(anagram, template.params[2:]):
|
||||
print("Found erroneous anagrams: ", template.params[2:])
|
||||
errors.append(anagram)
|
||||
break
|
||||
|
||||
with open("dubious_anagrams.txt", mode="w") as f:
|
||||
f.write("\n".join(errors))
|
||||
|
||||
if __name__ == "__main__":
|
||||
uncreated = set()
|
||||
try:
|
||||
with open(NOT_CREATED_LOG) as f:
|
||||
uncreated = set(f.readlines())
|
||||
except FileNotFoundError:
|
||||
with open(NOT_CREATED_LOG, "w") as f:
|
||||
pass
|
||||
try:
|
||||
main(uncreated)
|
||||
finally:
|
||||
with open(NOT_CREATED_LOG, "w") as f:
|
||||
f.writelines(uncreated)
|
||||
# uncreated = set()
|
||||
# try:
|
||||
# with open(NOT_CREATED_LOG) as f:
|
||||
# uncreated = set(f.readlines())
|
||||
# except FileNotFoundError:
|
||||
# with open(NOT_CREATED_LOG, "w") as f:
|
||||
# pass
|
||||
# try:
|
||||
# main(uncreated)
|
||||
# finally:
|
||||
# with open(NOT_CREATED_LOG, "w") as f:
|
||||
# f.writelines(uncreated)
|
||||
find_erroneous_anagrams()
|
16
bulgarian-anagrams/dubious_anagrams.txt
Normal file
16
bulgarian-anagrams/dubious_anagrams.txt
Normal file
@ -0,0 +1,16 @@
|
||||
зар
|
||||
иск
|
||||
пра-
|
||||
пот
|
||||
глоба
|
||||
картон
|
||||
арап
|
||||
тема
|
||||
треска
|
||||
нокът
|
||||
аз
|
||||
то
|
||||
А
|
||||
а
|
||||
ни
|
||||
това
|
11
bulgarian-anagrams/reformat.py
Normal file
11
bulgarian-anagrams/reformat.py
Normal file
@ -0,0 +1,11 @@
|
||||
def process(line: str) -> str:
|
||||
return line[line.rfind(" ")+1:].replace("-", "")
|
||||
|
||||
with open("words.txt") as f:
|
||||
contents = f.readlines()
|
||||
|
||||
for i, line in enumerate(contents):
|
||||
contents[i] = process(line)
|
||||
|
||||
with open("modified.txt", mode="w") as f:
|
||||
f.writelines(contents)
|
76110
bulgarian-anagrams/words2.txt
Normal file
76110
bulgarian-anagrams/words2.txt
Normal file
File diff suppressed because it is too large
Load Diff
43646
bulgarian-anagrams/words3.txt
Normal file
43646
bulgarian-anagrams/words3.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,7 @@ import pywikibot
|
||||
import mwparserfromhell
|
||||
import unicodedata
|
||||
import regex as re
|
||||
import random
|
||||
from collections import defaultdict
|
||||
|
||||
# From User:JeffDoozan's bot AutoDooz
|
||||
@ -17,8 +18,8 @@ RE_MATCH_CATEGORIES = re.compile(fr"({RE_CAT_TEMPLATES}|{RE_CATEGORIES})")
|
||||
SITE = pywikibot.Site("en", "wiktionary")
|
||||
BACKUP_PATH = "en-anagrams-backup"
|
||||
DIACRITICS = f"{chr(0x0300)}-{chr(0x036F)}"
|
||||
PUNCTUATION = r"’'\(\)\[\]\{\}<>:,‒–—―…!.«»-‐?‘’“”;/⁄␠·&@\*\•^¤¢$€£¥₩₪†‡°¡¿¬#№%‰‱¶′§~¨_|¦⁂☞∴‽※" + f"{chr(0x2000)}-{chr(0x206F)}"
|
||||
REDUNDANT_CHARS = f"[{DIACRITICS}{PUNCTUATION}]"
|
||||
PUNCTUATION = r"’'()\[\]{}<>:,‒–—―…!.«»\-‐?‘’“”;/⁄␠·&@*\\•^¤¢$€£¥₩₪†‡°¡¿¬#№%‰‱¶′§~¨_|¦⁂☞∴‽※" + f"{chr(0x2000)}-{chr(0x206F)}"
|
||||
REDUNDANT_CHARS = f"[{DIACRITICS}{PUNCTUATION} ]"
|
||||
|
||||
CONVERSIONS = {
|
||||
"æ": "ae",
|
||||
@ -52,7 +53,7 @@ def normalise(word: str) -> str:
|
||||
- Convert to lowercase (casefold)
|
||||
- Remove all irrelevant elements (punctuation, diacritics).
|
||||
"""
|
||||
word = word.strip().casefold()
|
||||
word = word.casefold()
|
||||
|
||||
for source_char, replacement in CONVERSIONS.items():
|
||||
word = word.replace(source_char, replacement)
|
||||
@ -178,6 +179,9 @@ def main():
|
||||
LIMIT = -1
|
||||
|
||||
print("Preparing to iterate over", len(anagrams), "alphragrams", f"({count_anagrams()} anagrams)")
|
||||
for anagram_list in anagrams.values():
|
||||
if random.randint(1, 1000) == 50:
|
||||
print(anagram_list)
|
||||
|
||||
edit_count = 0 # Updated for every individual page
|
||||
iterations = 0 # Updated for every set of anagrams
|
||||
|
37
english-anagrams/temp1
Normal file
37
english-anagrams/temp1
Normal file
@ -0,0 +1,37 @@
|
||||
{{also|Unicef|U.N.I.C.E.F.}}
|
||||
==English==
|
||||
{{wikipedia}}
|
||||
|
||||
===Alternative forms===
|
||||
* {{alter|en|Unicef||Britain, Australia, New Zealand}}
|
||||
* {{alter|en|U.N.I.C.E.F.}}
|
||||
|
||||
===Pronunciation===
|
||||
* English: {{a|US}} {{IPA|en|/ˈju.nəˌsɛf/}}
|
||||
|
||||
===Proper noun===
|
||||
{{en-proper noun}}
|
||||
|
||||
# {{acronym of|en|[[United Nations]] [[international|International]] [[child|Children's]] [[emergency|Emergency]] [[fund|Fund]]}}
|
||||
|
||||
====Translations====
|
||||
{{trans-top|United Nations International Children's Emergency Fund}}
|
||||
* Japanese: {{t+|ja|UNICEF|tr=Yunisefu}}
|
||||
* Macedonian: {{t|mk|УНИЦЕФ}}, {{t|mk|Уницеф}}
|
||||
* Russian: {{t+|ru|ЮНИСЕ́Ф|m|sc=Cyrl}}
|
||||
{{trans-bottom}}
|
||||
|
||||
{{cln|en|acronyms}}
|
||||
{{C|en|United Nations}}
|
||||
|
||||
==Japanese==
|
||||
|
||||
===Etymology===
|
||||
Borrowed from {{bor|ja|en|UNICEF|sort=ゆにせふ}}.
|
||||
|
||||
===Proper noun===
|
||||
{{ja-pos|proper|ユニセフ}}
|
||||
|
||||
# {{syn of|ja|国際連合児童基金|tr=Kokusai Rengō Jidō Kikin||{{w|United Nations Children's Emergency Fund}}}}; the {{l|en|UNICEF}}
|
||||
|
||||
{{topics|ja|Organizations|sort=ゆにせふ}}
|
37
english-anagrams/temp2
Normal file
37
english-anagrams/temp2
Normal file
@ -0,0 +1,37 @@
|
||||
{{also|Unicef|U.N.I.C.E.F.}}
|
||||
==English==
|
||||
{{wikipedia}}
|
||||
|
||||
===Alternative forms===
|
||||
* {{alter|en|Unicef||Britain, Australia, New Zealand}}
|
||||
* {{alter|en|U.N.I.C.E.F.}}
|
||||
|
||||
===Pronunciation===
|
||||
* English: {{a|US}} {{IPA|en|/ˈju.nəˌsɛf/}}
|
||||
|
||||
===Proper noun===
|
||||
{{en-proper noun}}
|
||||
|
||||
# {{acronym of|en|[[United Nations]] [[international|International]] [[child|Children's]] [[emergency|Emergency]] [[fund|Fund]]}}
|
||||
|
||||
====Translations====
|
||||
{{trans-top|United Nations International Children's Emergency Fund}}
|
||||
* Japanese: {{t+|ja|UNICEF|tr=Yunisefu}}
|
||||
* Macedonian: {{t|mk|УНИЦЕФ}}, {{t|mk|Уницеф}}
|
||||
* Russian: {{t+|ru|ЮНИСЕ́Ф|m|sc=Cyrl}}
|
||||
{{trans-bottom}}
|
||||
|
||||
{{cln|en|acronyms}}
|
||||
{{C|en|United Nations}}
|
||||
|
||||
==Japanese==
|
||||
|
||||
===Etymology===
|
||||
Borrowed from {{bor|ja|en|UNICEF|sort=ゆにせふ}}.
|
||||
|
||||
===Proper noun===
|
||||
{{ja-pos|proper|ユニセフ}}
|
||||
|
||||
# {{syn of|ja|国際連合児童基金|tr=Kokusai Rengō Jidō Kikin||{{w|United Nations Children's Emergency Fund}}}}; the {{l|en|UNICEF}}
|
||||
|
||||
{{topics|ja|Organizations|sort=ゆにせふ}}
|
1
ja-readings-fix/ja-readings-to-fix.txt
Normal file
1
ja-readings-fix/ja-readings-to-fix.txt
Normal file
File diff suppressed because one or more lines are too long
65
ja-readings-fix/ja-readings.py
Normal file
65
ja-readings-fix/ja-readings.py
Normal file
@ -0,0 +1,65 @@
|
||||
import traceback
|
||||
from typing import Iterator
|
||||
import kovachevbot
|
||||
import mwparserfromhell
|
||||
import sys
|
||||
import pywikibot
|
||||
import regex as re
|
||||
|
||||
ROMAJI_TRANSLITERATION_PATTERN = re.compile(r"\(\w+?\)")
|
||||
|
||||
def fix_reading_str(reading_str: str) -> str:
|
||||
all_readings = [each.strip() for each in reading_str.split(",")]
|
||||
all_readings = [kovachevbot.links_to_plaintext(ROMAJI_TRANSLITERATION_PATTERN.sub("", each)).strip() for each in all_readings]
|
||||
return ", ".join(all_readings)
|
||||
|
||||
def fix_page(page: pywikibot.Page):
|
||||
kanji = page.title()
|
||||
parsed = mwparserfromhell.parse(page.text)
|
||||
japanese_section_search = parsed.get_sections([2], "Japanese")
|
||||
if len(japanese_section_search) == 0:
|
||||
print("Skipping page", kanji, "as it has no Japanese section", file=sys.stderr)
|
||||
|
||||
japanese_section: mwparserfromhell.wikicode.Wikicode = japanese_section_search[0]
|
||||
|
||||
ja_readingses: list[mwparserfromhell.wikicode.Template] = japanese_section.filter(forcetype=mwparserfromhell.wikicode.Template, matches="ja-readings")
|
||||
|
||||
for ja_reading_template in ja_readingses:
|
||||
params_to_remove = list()
|
||||
for param in ja_reading_template.params:
|
||||
param: mwparserfromhell.nodes.extras.Parameter
|
||||
|
||||
# Can't delete params while iterating, so we need to store them to delete later
|
||||
if param.value == "": # Delete parameters that are supplied but not populated, e.g. "|nanori="
|
||||
params_to_remove.append(param)
|
||||
else:
|
||||
param.value = fix_reading_str(str(param.value))
|
||||
|
||||
for param in params_to_remove:
|
||||
ja_reading_template.remove(param)
|
||||
|
||||
page.text = str(parsed)
|
||||
|
||||
def main():
|
||||
with open("ja-readings-to-fix.txt") as f:
|
||||
kanji_to_fix = f.read()
|
||||
|
||||
|
||||
pages = (kovachevbot.wikt_page(kanji) for kanji in kanji_to_fix)
|
||||
checked_pages_iter: Iterator[pywikibot.Page] = kovachevbot.iterate_safe(pages)
|
||||
try:
|
||||
for i, page in enumerate(checked_pages_iter):
|
||||
print(page.title())
|
||||
fix_page(page)
|
||||
page.save("Remove redundant ja-readings markup (manual transliterations; manual links; empty params)")
|
||||
except:
|
||||
i -= 1
|
||||
if i < 0: i = 0
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
kanji_to_fix = kanji_to_fix[max(i+1, 0):]
|
||||
with open("ja-readings-to-fix.txt", mode="w") as f:
|
||||
kanji_to_fix = f.write(kanji_to_fix)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -3,10 +3,10 @@ import pywikibot
|
||||
import os
|
||||
import subprocess
|
||||
import mwparserfromhell
|
||||
import kovachevbot
|
||||
from mwparserfromhell.wikicode import Template
|
||||
from restore_pages import BACKUP_PATH
|
||||
|
||||
|
||||
JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi"
|
||||
SITE = pywikibot.Site("en", "wiktionary")
|
||||
|
||||
@ -15,7 +15,6 @@ def get_yomi_pages() -> Generator[pywikibot.Page, None, None]:
|
||||
MAIN_NAMESPACE = SITE.namespaces.MAIN
|
||||
return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
|
||||
|
||||
|
||||
# Use mwparserfromhell to filter all the templates, select the ja-pron ones, and remove any "y" or "yomi"
|
||||
# arguments they might have.
|
||||
def remove_yomi_from_page(page: pywikibot.Page) -> None:
|
||||
@ -27,7 +26,7 @@ def remove_yomi_from_page(page: pywikibot.Page) -> None:
|
||||
parsed = mwparserfromhell.parse(text)
|
||||
for template in parsed.ifilter(forcetype=Template, recursive=False):
|
||||
template: Template
|
||||
if template.name != "ja-pron":
|
||||
if template.name != "ja-pron" and str(template.name).casefold() != "ja-ipa":
|
||||
continue
|
||||
|
||||
if template.has("y"):
|
||||
@ -72,37 +71,32 @@ def template_argument_counts_accord(previous_text: str, current_text: str) -> bo
|
||||
if previous_pron.name != "ja-pron" or current_pron.name != "ja-pron":
|
||||
continue
|
||||
|
||||
if not (previous_pron.has("y") or previous_pron.has("yomi")):
|
||||
continue
|
||||
|
||||
if len(current_pron.params) != len(previous_pron.params) - 1:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
# Get the maximum number of edits to make from the user (e.g. `pwb ja-yomi-remove 100`);
|
||||
# if not found then set to unlimited (-1)
|
||||
try:
|
||||
LIMIT = int(pywikibot.argvu[1])
|
||||
except:
|
||||
LIMIT = -1
|
||||
|
||||
for edit_count, page in enumerate(get_yomi_pages()):
|
||||
if edit_count == LIMIT:
|
||||
return
|
||||
if edit_count % 5 == 0:
|
||||
halt_page = pywikibot.Page(SITE, "User:KovachevBot/halt")
|
||||
if "halt" in halt_page.text.casefold():
|
||||
print(f"ERROR: BOT WAS MANUALLY HALTED BY {halt_page.userName()}")
|
||||
return
|
||||
|
||||
for page in kovachevbot.iterate_safe((get_yomi_pages())):
|
||||
original_text = page.text
|
||||
|
||||
print(f"Removing yomi from {page.title()}...")
|
||||
|
||||
page.text = remove_yomi_from_page(page)
|
||||
|
||||
print(f"Backing up {page.title()}...")
|
||||
create_diff(original_text, page)
|
||||
kovachevbot.backup_page(original_text, page, BACKUP_PATH)
|
||||
|
||||
try:
|
||||
assert template_argument_counts_accord(original_text, page.text)
|
||||
page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True, botflag=True)
|
||||
|
||||
|
||||
except AssertionError:
|
||||
print("ERROR: page raised error, template argument-counting failsafe did not accord")
|
||||
continue
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
3
kovachevbot/README.md
Normal file
3
kovachevbot/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# KovachevBot commons
|
||||
This is a directory which acts as a Python module, containing numerous functions and patterns
|
||||
that I typically reuse throughout my bot code.
|
8
kovachevbot/kovachevbot.egg-info/PKG-INFO
Normal file
8
kovachevbot/kovachevbot.egg-info/PKG-INFO
Normal file
@ -0,0 +1,8 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: kovachevbot
|
||||
Version: 0.1
|
||||
Summary: Library of generic functions and constants used in my bot scripts
|
||||
Home-page: https://www.kovachev.xyz
|
||||
Author: Kiril Kovachev
|
||||
Author-email: kkovachev1947@gmail.com
|
||||
License: MIT
|
9
kovachevbot/kovachevbot.egg-info/SOURCES.txt
Normal file
9
kovachevbot/kovachevbot.egg-info/SOURCES.txt
Normal file
@ -0,0 +1,9 @@
|
||||
README.md
|
||||
setup.py
|
||||
kovachevbot/__init__.py
|
||||
kovachevbot/common.py
|
||||
kovachevbot.egg-info/PKG-INFO
|
||||
kovachevbot.egg-info/SOURCES.txt
|
||||
kovachevbot.egg-info/dependency_links.txt
|
||||
kovachevbot.egg-info/not-zip-safe
|
||||
kovachevbot.egg-info/top_level.txt
|
1
kovachevbot/kovachevbot.egg-info/dependency_links.txt
Normal file
1
kovachevbot/kovachevbot.egg-info/dependency_links.txt
Normal file
@ -0,0 +1 @@
|
||||
|
1
kovachevbot/kovachevbot.egg-info/not-zip-safe
Normal file
1
kovachevbot/kovachevbot.egg-info/not-zip-safe
Normal file
@ -0,0 +1 @@
|
||||
|
1
kovachevbot/kovachevbot.egg-info/top_level.txt
Normal file
1
kovachevbot/kovachevbot.egg-info/top_level.txt
Normal file
@ -0,0 +1 @@
|
||||
kovachevbot
|
3
kovachevbot/kovachevbot/__init__.py
Normal file
3
kovachevbot/kovachevbot/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
# When `import kovachevbot` is run, extract all of its data under the simple namespace `kovachevbot`
|
||||
# (as opposed to resolving `kovachevbot.common` for all module data)
|
||||
from kovachevbot.common import *
|
180
kovachevbot/kovachevbot/common.py
Normal file
180
kovachevbot/kovachevbot/common.py
Normal file
@ -0,0 +1,180 @@
|
||||
import os
|
||||
import subprocess
|
||||
import pywikibot
|
||||
import tkinter
|
||||
import sys
|
||||
import itertools
|
||||
import mwparserfromhell
|
||||
import regex as re
|
||||
from typing import Generator, Iterator
|
||||
|
||||
|
||||
WIKTIONARY = pywikibot.Site("en", "wiktionary")
|
||||
TEMPLATE_NAMESPACE = WIKTIONARY.namespaces.TEMPLATE
|
||||
MAIN_NAMESPACE = WIKTIONARY.namespaces.MAIN
|
||||
COMMONS = pywikibot.Site("commons", "commons")
|
||||
LINK_PATTERN = re.compile(r"\[\[(.+?)(?:\|(.+?))?\]\]")
|
||||
|
||||
|
||||
def wikt_page(title: str) -> pywikibot.Page:
|
||||
return pywikibot.Page(WIKTIONARY, title)
|
||||
|
||||
def save_gui(page: pywikibot.Page, default_edit_summary: str = "") -> bool:
|
||||
"""Returns whether the edit was successfully completed through the save button or not."""
|
||||
window = tkinter.Tk()
|
||||
window.title(f"Editing page {page.title()}")
|
||||
window.geometry("800x600")
|
||||
window.config(bg="#000000")
|
||||
|
||||
page_text_label = tkinter.Label(master=window, text="Page contents")
|
||||
page_text_box = tkinter.Text(master=window)
|
||||
page_text_box.place(x=0, y=0)
|
||||
page_text_box.pack(fill="x", expand=False, padx=20, pady=0)
|
||||
page_text_box.insert("1.0", page.text)
|
||||
|
||||
edit_summary_label = tkinter.Label(window, text="Edit summary")
|
||||
edit_summar_var = tkinter.StringVar()
|
||||
edit_summar_var.set(default_edit_summary)
|
||||
edit_summary_box = tkinter.Entry(window, textvariable=edit_summar_var, width=60)
|
||||
edit_summary_label.place(x=20, y=500)
|
||||
edit_summary_box.place(x=20, y=525)
|
||||
|
||||
success = False
|
||||
|
||||
def save_content():
|
||||
page.text = page_text_box.get("1.0", "end")
|
||||
edit_summary = edit_summar_var.get()
|
||||
window.destroy()
|
||||
page.save(edit_summary, minor=False)
|
||||
nonlocal success
|
||||
success = True
|
||||
|
||||
button = tkinter.Button(window, text="Save", command=save_content)
|
||||
button.place(x=400, y=550)
|
||||
|
||||
window.mainloop()
|
||||
return success
|
||||
|
||||
# save_gui(pywikibot.Page(WIKTIONARY, "User:Kiril kovachev/Sandbox"))
|
||||
|
||||
def convert_link_to_plaintext(link: mwparserfromhell.wikicode.Wikilink) -> str:
|
||||
if link.text is not None:
|
||||
if link.text == "": return link.title
|
||||
else: return link.text
|
||||
else:
|
||||
return link.title
|
||||
|
||||
def links_to_plaintext(text: str) -> str:
|
||||
parsed: mwparserfromhell.wikicode.Wikicode = mwparserfromhell.parse(text)
|
||||
links = parsed.filter(forcetype=mwparserfromhell.wikicode.Wikilink)
|
||||
for link in links:
|
||||
plain = convert_link_to_plaintext(link)
|
||||
parsed.replace(link, plain)
|
||||
|
||||
return str(parsed)
|
||||
|
||||
ABORT_CHECK_INTERVAL = 5
|
||||
HALT_PAGE = wikt_page("User:KovachevBot/halt") # Do not edit, please!
|
||||
|
||||
def iterate_with_abort_check(iterator: Iterator, interval: int = ABORT_CHECK_INTERVAL, halt_page = HALT_PAGE):
|
||||
"""
|
||||
Run over an iterator, checking at every interval of 5 (or other specified value)
|
||||
whether the bot has been ordered to stop. The failsafe site is defined as User:KovachevBot/halt by default.
|
||||
"""
|
||||
for edit_count, value in enumerate(iterator):
|
||||
# Check halt page
|
||||
if edit_count % interval == 0:
|
||||
if "halt" in halt_page.text.casefold():
|
||||
print(f"ERROR: BOT WAS MANUALLY HALTED BY {halt_page.userName()}", file=sys.stderr)
|
||||
return
|
||||
yield value
|
||||
|
||||
def iterate_entries(iterator: Iterator, max_edits: int = None):
|
||||
"""Iterate at most `max_edits` entries of an iterator (of pages), or unlimited.
|
||||
If no `max_edits` is provided as an arg, try to get the value from the command-line arguments.
|
||||
If it still isn't found, default to running indefinitely.
|
||||
If it is provided, but it's not a valid integer, it will default to unlimited again.
|
||||
In the unlimited case, this effectively means this iterator will run until the original one is exhausted.
|
||||
"""
|
||||
if max_edits is None:
|
||||
try:
|
||||
edit_iter = range(int(pywikibot.argvu[1]))
|
||||
except:
|
||||
edit_iter = itertools.count()
|
||||
else:
|
||||
try:
|
||||
edit_iter = range(int(max_edits))
|
||||
except ValueError:
|
||||
edit_iter = itertools.count()
|
||||
|
||||
for _, value in zip(edit_iter, iterator):
|
||||
yield value
|
||||
|
||||
def iterate_safe(iterator: Iterator, max_entries: int = None, abort_check_interval: int = ABORT_CHECK_INTERVAL, halt_page: pywikibot.Page = HALT_PAGE):
|
||||
"""Iterate safely over an iterator of pages, checking every `abort_check_interval` for whether to halt
|
||||
the bot based on a user's manual request (by editing the `halt_page` to contain the word 'halt'),
|
||||
yielding at most `max_entries`.
|
||||
"""
|
||||
return iterate_entries(iterate_with_abort_check(iterator, abort_check_interval, halt_page), max_entries)
|
||||
|
||||
def iterate_tracking(tracking_page: str) -> Generator[pywikibot.Page, None, None]:
|
||||
"""
|
||||
Iterate over pages in a tracking category on Wiktionary (linked to within Template:tracking/(page_name_here)).
|
||||
`tracking_page` should be the name of the tracking category: e.g. if you want to iterate
|
||||
over `Template:tracking/ja-pron/yomi`, you would enter `ja-pron/yomi`.
|
||||
Returns only entries in the main entry namespace.
|
||||
"""
|
||||
return pywikibot.Page(WIKTIONARY, f"tracking/{tracking_page}", ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
|
||||
|
||||
def iterate_category(category_name: str) -> Generator[pywikibot.Page, None, None]:
|
||||
"""Iterate pages in a category on Wiktionary.
|
||||
The `category_name` should be the name without the Category: namespace, e.g.
|
||||
`category_name="Bulgarian lemmas"`.
|
||||
"""
|
||||
return pywikibot.Category(WIKTIONARY, category_name).articles(namespaces=[MAIN_NAMESPACE])
|
||||
|
||||
def backup_page(old_text: str, new_page: pywikibot.Page, backup_path: str, file_name: str = None) -> None:
|
||||
"""
|
||||
Copy the contents of the page to local storage for backup in case there is a problem
|
||||
with the script later; this will allow the error to be automatically corrected at that time.
|
||||
"""
|
||||
|
||||
file_name = file_name or new_page.title()
|
||||
os.makedirs(backup_path, exist_ok=True)
|
||||
|
||||
with open("temp1", mode="w", encoding="utf-8") as f:
|
||||
f.write(old_text)
|
||||
|
||||
with open("temp2", mode="w", encoding="utf-8") as f:
|
||||
f.write(new_page.text)
|
||||
|
||||
diff = subprocess.getoutput("diff -u temp2 temp1") # Get differences between new revision and previous
|
||||
diff = diff + "\n" # patch will complain if we don't end the file with a newline
|
||||
|
||||
with open(os.path.join(backup_path, new_page.title()), mode="w", encoding="utf-8") as f:
|
||||
f.write(diff)
|
||||
|
||||
def add_l2(parsed: mwparserfromhell.wikicode.Wikicode, l2_section: mwparserfromhell.wikicode.Wikicode) -> None:
|
||||
parsed = mwparserfromhell.parse(parsed)
|
||||
l2_section = mwparserfromhell.parse(l2_section)
|
||||
|
||||
l2_title = l2_section.nodes[0].title
|
||||
|
||||
if l2_title in [section.nodes[0].title for section in parsed.get_sections([2])]:
|
||||
return
|
||||
|
||||
new = mwparserfromhell.parse("")
|
||||
|
||||
l2_sections = parsed.get_sections([2])
|
||||
l2_sections.append(l2_section)
|
||||
|
||||
l2_sections.sort(key=lambda section: section.nodes[0].title)
|
||||
|
||||
for section in l2_sections:
|
||||
section.append("\n\n")
|
||||
new.append(section)
|
||||
|
||||
parsed.nodes = new.nodes
|
||||
|
||||
while "\n\n\n" in parsed:
|
||||
parsed.replace("\n\n\n", "\n\n")
|
11
kovachevbot/setup.py
Normal file
11
kovachevbot/setup.py
Normal file
@ -0,0 +1,11 @@
|
||||
from setuptools import setup
|
||||
|
||||
setup(name="kovachevbot",
|
||||
version="0.1",
|
||||
description="Library of generic functions and constants used in my bot scripts",
|
||||
url="https://www.kovachev.xyz",
|
||||
author="Kiril Kovachev",
|
||||
author_email="kkovachev1947@gmail.com",
|
||||
license="MIT",
|
||||
packages=["kovachevbot"],
|
||||
zip_safe=False)
|
Loading…
Reference in New Issue
Block a user