Update with library, scritps
This commit is contained in:
parent
e22c88c737
commit
8dc7c70cd9
@ -139,6 +139,10 @@ def update_page(title: str, alphagram: str, uncreated: set[str]) -> bool:
|
|||||||
else:
|
else:
|
||||||
page.text = new_content
|
page.text = new_content
|
||||||
plural_s = "s" if len(anagrams_added) > 1 else ""
|
plural_s = "s" if len(anagrams_added) > 1 else ""
|
||||||
|
if len(anagrams_added) == 0:
|
||||||
|
print("Nothing was added, but the content was changed! (not saved)")
|
||||||
|
return False
|
||||||
|
|
||||||
page.save(f"Added anagram{plural_s} ({', '.join(anagrams_added)}) to Bulgarian section", minor=False)
|
page.save(f"Added anagram{plural_s} ({', '.join(anagrams_added)}) to Bulgarian section", minor=False)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -172,16 +176,49 @@ def main(uncreated: set[str]):
|
|||||||
|
|
||||||
iterations += 1
|
iterations += 1
|
||||||
|
|
||||||
|
def there_are_erroneous_anagrams(original, anagrams: set[str]) -> bool:
|
||||||
|
for anagram in anagrams:
|
||||||
|
if anagram == original: continue
|
||||||
|
if normalise(anagram) == normalise(original):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def find_erroneous_anagrams():
|
||||||
|
errors = []
|
||||||
|
for anagram_list in anagrams.values():
|
||||||
|
for anagram in anagram_list:
|
||||||
|
page = pywikibot.Page(SITE, anagram)
|
||||||
|
|
||||||
|
if not page.exists(): continue
|
||||||
|
if not has_bulgarian(page): continue
|
||||||
|
|
||||||
|
print("Traversing page", anagram + "...")
|
||||||
|
|
||||||
|
for template in mwparserfromhell.parse(page.text).filter(forcetype=mwparserfromhell.wikicode.Template):
|
||||||
|
template: mwparserfromhell.wikicode.Template
|
||||||
|
if template.name != "anagrams": continue
|
||||||
|
if not template.has_param(1): continue
|
||||||
|
if template.get(1) != "bg": continue
|
||||||
|
|
||||||
|
if there_are_erroneous_anagrams(anagram, template.params[2:]):
|
||||||
|
print("Found erroneous anagrams: ", template.params[2:])
|
||||||
|
errors.append(anagram)
|
||||||
|
break
|
||||||
|
|
||||||
|
with open("dubious_anagrams.txt", mode="w") as f:
|
||||||
|
f.write("\n".join(errors))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uncreated = set()
|
# uncreated = set()
|
||||||
try:
|
# try:
|
||||||
with open(NOT_CREATED_LOG) as f:
|
# with open(NOT_CREATED_LOG) as f:
|
||||||
uncreated = set(f.readlines())
|
# uncreated = set(f.readlines())
|
||||||
except FileNotFoundError:
|
# except FileNotFoundError:
|
||||||
with open(NOT_CREATED_LOG, "w") as f:
|
# with open(NOT_CREATED_LOG, "w") as f:
|
||||||
pass
|
# pass
|
||||||
try:
|
# try:
|
||||||
main(uncreated)
|
# main(uncreated)
|
||||||
finally:
|
# finally:
|
||||||
with open(NOT_CREATED_LOG, "w") as f:
|
# with open(NOT_CREATED_LOG, "w") as f:
|
||||||
f.writelines(uncreated)
|
# f.writelines(uncreated)
|
||||||
|
find_erroneous_anagrams()
|
16
bulgarian-anagrams/dubious_anagrams.txt
Normal file
16
bulgarian-anagrams/dubious_anagrams.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
зар
|
||||||
|
иск
|
||||||
|
пра-
|
||||||
|
пот
|
||||||
|
глоба
|
||||||
|
картон
|
||||||
|
арап
|
||||||
|
тема
|
||||||
|
треска
|
||||||
|
нокът
|
||||||
|
аз
|
||||||
|
то
|
||||||
|
А
|
||||||
|
а
|
||||||
|
ни
|
||||||
|
това
|
11
bulgarian-anagrams/reformat.py
Normal file
11
bulgarian-anagrams/reformat.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
def process(line: str) -> str:
|
||||||
|
return line[line.rfind(" ")+1:].replace("-", "")
|
||||||
|
|
||||||
|
with open("words.txt") as f:
|
||||||
|
contents = f.readlines()
|
||||||
|
|
||||||
|
for i, line in enumerate(contents):
|
||||||
|
contents[i] = process(line)
|
||||||
|
|
||||||
|
with open("modified.txt", mode="w") as f:
|
||||||
|
f.writelines(contents)
|
76110
bulgarian-anagrams/words2.txt
Normal file
76110
bulgarian-anagrams/words2.txt
Normal file
File diff suppressed because it is too large
Load Diff
43646
bulgarian-anagrams/words3.txt
Normal file
43646
bulgarian-anagrams/words3.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,7 @@ import pywikibot
|
|||||||
import mwparserfromhell
|
import mwparserfromhell
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import regex as re
|
import regex as re
|
||||||
|
import random
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
# From User:JeffDoozan's bot AutoDooz
|
# From User:JeffDoozan's bot AutoDooz
|
||||||
@ -17,8 +18,8 @@ RE_MATCH_CATEGORIES = re.compile(fr"({RE_CAT_TEMPLATES}|{RE_CATEGORIES})")
|
|||||||
SITE = pywikibot.Site("en", "wiktionary")
|
SITE = pywikibot.Site("en", "wiktionary")
|
||||||
BACKUP_PATH = "en-anagrams-backup"
|
BACKUP_PATH = "en-anagrams-backup"
|
||||||
DIACRITICS = f"{chr(0x0300)}-{chr(0x036F)}"
|
DIACRITICS = f"{chr(0x0300)}-{chr(0x036F)}"
|
||||||
PUNCTUATION = r"’'\(\)\[\]\{\}<>:,‒–—―…!.«»-‐?‘’“”;/⁄␠·&@\*\•^¤¢$€£¥₩₪†‡°¡¿¬#№%‰‱¶′§~¨_|¦⁂☞∴‽※" + f"{chr(0x2000)}-{chr(0x206F)}"
|
PUNCTUATION = r"’'()\[\]{}<>:,‒–—―…!.«»\-‐?‘’“”;/⁄␠·&@*\\•^¤¢$€£¥₩₪†‡°¡¿¬#№%‰‱¶′§~¨_|¦⁂☞∴‽※" + f"{chr(0x2000)}-{chr(0x206F)}"
|
||||||
REDUNDANT_CHARS = f"[{DIACRITICS}{PUNCTUATION}]"
|
REDUNDANT_CHARS = f"[{DIACRITICS}{PUNCTUATION} ]"
|
||||||
|
|
||||||
CONVERSIONS = {
|
CONVERSIONS = {
|
||||||
"æ": "ae",
|
"æ": "ae",
|
||||||
@ -52,7 +53,7 @@ def normalise(word: str) -> str:
|
|||||||
- Convert to lowercase (casefold)
|
- Convert to lowercase (casefold)
|
||||||
- Remove all irrelevant elements (punctuation, diacritics).
|
- Remove all irrelevant elements (punctuation, diacritics).
|
||||||
"""
|
"""
|
||||||
word = word.strip().casefold()
|
word = word.casefold()
|
||||||
|
|
||||||
for source_char, replacement in CONVERSIONS.items():
|
for source_char, replacement in CONVERSIONS.items():
|
||||||
word = word.replace(source_char, replacement)
|
word = word.replace(source_char, replacement)
|
||||||
@ -178,6 +179,9 @@ def main():
|
|||||||
LIMIT = -1
|
LIMIT = -1
|
||||||
|
|
||||||
print("Preparing to iterate over", len(anagrams), "alphragrams", f"({count_anagrams()} anagrams)")
|
print("Preparing to iterate over", len(anagrams), "alphragrams", f"({count_anagrams()} anagrams)")
|
||||||
|
for anagram_list in anagrams.values():
|
||||||
|
if random.randint(1, 1000) == 50:
|
||||||
|
print(anagram_list)
|
||||||
|
|
||||||
edit_count = 0 # Updated for every individual page
|
edit_count = 0 # Updated for every individual page
|
||||||
iterations = 0 # Updated for every set of anagrams
|
iterations = 0 # Updated for every set of anagrams
|
||||||
|
37
english-anagrams/temp1
Normal file
37
english-anagrams/temp1
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
{{also|Unicef|U.N.I.C.E.F.}}
|
||||||
|
==English==
|
||||||
|
{{wikipedia}}
|
||||||
|
|
||||||
|
===Alternative forms===
|
||||||
|
* {{alter|en|Unicef||Britain, Australia, New Zealand}}
|
||||||
|
* {{alter|en|U.N.I.C.E.F.}}
|
||||||
|
|
||||||
|
===Pronunciation===
|
||||||
|
* English: {{a|US}} {{IPA|en|/ˈju.nəˌsɛf/}}
|
||||||
|
|
||||||
|
===Proper noun===
|
||||||
|
{{en-proper noun}}
|
||||||
|
|
||||||
|
# {{acronym of|en|[[United Nations]] [[international|International]] [[child|Children's]] [[emergency|Emergency]] [[fund|Fund]]}}
|
||||||
|
|
||||||
|
====Translations====
|
||||||
|
{{trans-top|United Nations International Children's Emergency Fund}}
|
||||||
|
* Japanese: {{t+|ja|UNICEF|tr=Yunisefu}}
|
||||||
|
* Macedonian: {{t|mk|УНИЦЕФ}}, {{t|mk|Уницеф}}
|
||||||
|
* Russian: {{t+|ru|ЮНИСЕ́Ф|m|sc=Cyrl}}
|
||||||
|
{{trans-bottom}}
|
||||||
|
|
||||||
|
{{cln|en|acronyms}}
|
||||||
|
{{C|en|United Nations}}
|
||||||
|
|
||||||
|
==Japanese==
|
||||||
|
|
||||||
|
===Etymology===
|
||||||
|
Borrowed from {{bor|ja|en|UNICEF|sort=ゆにせふ}}.
|
||||||
|
|
||||||
|
===Proper noun===
|
||||||
|
{{ja-pos|proper|ユニセフ}}
|
||||||
|
|
||||||
|
# {{syn of|ja|国際連合児童基金|tr=Kokusai Rengō Jidō Kikin||{{w|United Nations Children's Emergency Fund}}}}; the {{l|en|UNICEF}}
|
||||||
|
|
||||||
|
{{topics|ja|Organizations|sort=ゆにせふ}}
|
37
english-anagrams/temp2
Normal file
37
english-anagrams/temp2
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
{{also|Unicef|U.N.I.C.E.F.}}
|
||||||
|
==English==
|
||||||
|
{{wikipedia}}
|
||||||
|
|
||||||
|
===Alternative forms===
|
||||||
|
* {{alter|en|Unicef||Britain, Australia, New Zealand}}
|
||||||
|
* {{alter|en|U.N.I.C.E.F.}}
|
||||||
|
|
||||||
|
===Pronunciation===
|
||||||
|
* English: {{a|US}} {{IPA|en|/ˈju.nəˌsɛf/}}
|
||||||
|
|
||||||
|
===Proper noun===
|
||||||
|
{{en-proper noun}}
|
||||||
|
|
||||||
|
# {{acronym of|en|[[United Nations]] [[international|International]] [[child|Children's]] [[emergency|Emergency]] [[fund|Fund]]}}
|
||||||
|
|
||||||
|
====Translations====
|
||||||
|
{{trans-top|United Nations International Children's Emergency Fund}}
|
||||||
|
* Japanese: {{t+|ja|UNICEF|tr=Yunisefu}}
|
||||||
|
* Macedonian: {{t|mk|УНИЦЕФ}}, {{t|mk|Уницеф}}
|
||||||
|
* Russian: {{t+|ru|ЮНИСЕ́Ф|m|sc=Cyrl}}
|
||||||
|
{{trans-bottom}}
|
||||||
|
|
||||||
|
{{cln|en|acronyms}}
|
||||||
|
{{C|en|United Nations}}
|
||||||
|
|
||||||
|
==Japanese==
|
||||||
|
|
||||||
|
===Etymology===
|
||||||
|
Borrowed from {{bor|ja|en|UNICEF|sort=ゆにせふ}}.
|
||||||
|
|
||||||
|
===Proper noun===
|
||||||
|
{{ja-pos|proper|ユニセフ}}
|
||||||
|
|
||||||
|
# {{syn of|ja|国際連合児童基金|tr=Kokusai Rengō Jidō Kikin||{{w|United Nations Children's Emergency Fund}}}}; the {{l|en|UNICEF}}
|
||||||
|
|
||||||
|
{{topics|ja|Organizations|sort=ゆにせふ}}
|
1
ja-readings-fix/ja-readings-to-fix.txt
Normal file
1
ja-readings-fix/ja-readings-to-fix.txt
Normal file
File diff suppressed because one or more lines are too long
65
ja-readings-fix/ja-readings.py
Normal file
65
ja-readings-fix/ja-readings.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import traceback
|
||||||
|
from typing import Iterator
|
||||||
|
import kovachevbot
|
||||||
|
import mwparserfromhell
|
||||||
|
import sys
|
||||||
|
import pywikibot
|
||||||
|
import regex as re
|
||||||
|
|
||||||
|
ROMAJI_TRANSLITERATION_PATTERN = re.compile(r"\(\w+?\)")
|
||||||
|
|
||||||
|
def fix_reading_str(reading_str: str) -> str:
|
||||||
|
all_readings = [each.strip() for each in reading_str.split(",")]
|
||||||
|
all_readings = [kovachevbot.links_to_plaintext(ROMAJI_TRANSLITERATION_PATTERN.sub("", each)).strip() for each in all_readings]
|
||||||
|
return ", ".join(all_readings)
|
||||||
|
|
||||||
|
def fix_page(page: pywikibot.Page):
|
||||||
|
kanji = page.title()
|
||||||
|
parsed = mwparserfromhell.parse(page.text)
|
||||||
|
japanese_section_search = parsed.get_sections([2], "Japanese")
|
||||||
|
if len(japanese_section_search) == 0:
|
||||||
|
print("Skipping page", kanji, "as it has no Japanese section", file=sys.stderr)
|
||||||
|
|
||||||
|
japanese_section: mwparserfromhell.wikicode.Wikicode = japanese_section_search[0]
|
||||||
|
|
||||||
|
ja_readingses: list[mwparserfromhell.wikicode.Template] = japanese_section.filter(forcetype=mwparserfromhell.wikicode.Template, matches="ja-readings")
|
||||||
|
|
||||||
|
for ja_reading_template in ja_readingses:
|
||||||
|
params_to_remove = list()
|
||||||
|
for param in ja_reading_template.params:
|
||||||
|
param: mwparserfromhell.nodes.extras.Parameter
|
||||||
|
|
||||||
|
# Can't delete params while iterating, so we need to store them to delete later
|
||||||
|
if param.value == "": # Delete parameters that are supplied but not populated, e.g. "|nanori="
|
||||||
|
params_to_remove.append(param)
|
||||||
|
else:
|
||||||
|
param.value = fix_reading_str(str(param.value))
|
||||||
|
|
||||||
|
for param in params_to_remove:
|
||||||
|
ja_reading_template.remove(param)
|
||||||
|
|
||||||
|
page.text = str(parsed)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open("ja-readings-to-fix.txt") as f:
|
||||||
|
kanji_to_fix = f.read()
|
||||||
|
|
||||||
|
|
||||||
|
pages = (kovachevbot.wikt_page(kanji) for kanji in kanji_to_fix)
|
||||||
|
checked_pages_iter: Iterator[pywikibot.Page] = kovachevbot.iterate_safe(pages)
|
||||||
|
try:
|
||||||
|
for i, page in enumerate(checked_pages_iter):
|
||||||
|
print(page.title())
|
||||||
|
fix_page(page)
|
||||||
|
page.save("Remove redundant ja-readings markup (manual transliterations; manual links; empty params)")
|
||||||
|
except:
|
||||||
|
i -= 1
|
||||||
|
if i < 0: i = 0
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
kanji_to_fix = kanji_to_fix[max(i+1, 0):]
|
||||||
|
with open("ja-readings-to-fix.txt", mode="w") as f:
|
||||||
|
kanji_to_fix = f.write(kanji_to_fix)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -3,10 +3,10 @@ import pywikibot
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import mwparserfromhell
|
import mwparserfromhell
|
||||||
|
import kovachevbot
|
||||||
from mwparserfromhell.wikicode import Template
|
from mwparserfromhell.wikicode import Template
|
||||||
from restore_pages import BACKUP_PATH
|
from restore_pages import BACKUP_PATH
|
||||||
|
|
||||||
|
|
||||||
JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi"
|
JA_YOMI_TRACKING_PAGE = "tracking/ja-pron/yomi"
|
||||||
SITE = pywikibot.Site("en", "wiktionary")
|
SITE = pywikibot.Site("en", "wiktionary")
|
||||||
|
|
||||||
@ -15,7 +15,6 @@ def get_yomi_pages() -> Generator[pywikibot.Page, None, None]:
|
|||||||
MAIN_NAMESPACE = SITE.namespaces.MAIN
|
MAIN_NAMESPACE = SITE.namespaces.MAIN
|
||||||
return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
|
return pywikibot.Page(SITE, JA_YOMI_TRACKING_PAGE, ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
|
||||||
|
|
||||||
|
|
||||||
# Use mwparserfromhell to filter all the templates, select the ja-pron ones, and remove any "y" or "yomi"
|
# Use mwparserfromhell to filter all the templates, select the ja-pron ones, and remove any "y" or "yomi"
|
||||||
# arguments they might have.
|
# arguments they might have.
|
||||||
def remove_yomi_from_page(page: pywikibot.Page) -> None:
|
def remove_yomi_from_page(page: pywikibot.Page) -> None:
|
||||||
@ -27,9 +26,9 @@ def remove_yomi_from_page(page: pywikibot.Page) -> None:
|
|||||||
parsed = mwparserfromhell.parse(text)
|
parsed = mwparserfromhell.parse(text)
|
||||||
for template in parsed.ifilter(forcetype=Template, recursive=False):
|
for template in parsed.ifilter(forcetype=Template, recursive=False):
|
||||||
template: Template
|
template: Template
|
||||||
if template.name != "ja-pron":
|
if template.name != "ja-pron" and str(template.name).casefold() != "ja-ipa":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if template.has("y"):
|
if template.has("y"):
|
||||||
template.remove("y")
|
template.remove("y")
|
||||||
if template.has("yomi"):
|
if template.has("yomi"):
|
||||||
@ -72,37 +71,32 @@ def template_argument_counts_accord(previous_text: str, current_text: str) -> bo
|
|||||||
if previous_pron.name != "ja-pron" or current_pron.name != "ja-pron":
|
if previous_pron.name != "ja-pron" or current_pron.name != "ja-pron":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if not (previous_pron.has("y") or previous_pron.has("yomi")):
|
||||||
|
continue
|
||||||
|
|
||||||
if len(current_pron.params) != len(previous_pron.params) - 1:
|
if len(current_pron.params) != len(previous_pron.params) - 1:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Get the maximum number of edits to make from the user (e.g. `pwb ja-yomi-remove 100`);
|
for page in kovachevbot.iterate_safe((get_yomi_pages())):
|
||||||
# if not found then set to unlimited (-1)
|
|
||||||
try:
|
|
||||||
LIMIT = int(pywikibot.argvu[1])
|
|
||||||
except:
|
|
||||||
LIMIT = -1
|
|
||||||
|
|
||||||
for edit_count, page in enumerate(get_yomi_pages()):
|
|
||||||
if edit_count == LIMIT:
|
|
||||||
return
|
|
||||||
if edit_count % 5 == 0:
|
|
||||||
halt_page = pywikibot.Page(SITE, "User:KovachevBot/halt")
|
|
||||||
if "halt" in halt_page.text.casefold():
|
|
||||||
print(f"ERROR: BOT WAS MANUALLY HALTED BY {halt_page.userName()}")
|
|
||||||
return
|
|
||||||
|
|
||||||
original_text = page.text
|
original_text = page.text
|
||||||
|
|
||||||
print(f"Removing yomi from {page.title()}...")
|
print(f"Removing yomi from {page.title()}...")
|
||||||
|
|
||||||
page.text = remove_yomi_from_page(page)
|
page.text = remove_yomi_from_page(page)
|
||||||
|
|
||||||
print(f"Backing up {page.title()}...")
|
print(f"Backing up {page.title()}...")
|
||||||
create_diff(original_text, page)
|
kovachevbot.backup_page(original_text, page, BACKUP_PATH)
|
||||||
assert template_argument_counts_accord(original_text, page.text)
|
|
||||||
page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True, botflag=True)
|
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert template_argument_counts_accord(original_text, page.text)
|
||||||
|
page.save("Removed deprecated yomi/y parameters from {{ja-pron}} (automated task)", minor=True, botflag=True)
|
||||||
|
except AssertionError:
|
||||||
|
print("ERROR: page raised error, template argument-counting failsafe did not accord")
|
||||||
|
continue
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
3
kovachevbot/README.md
Normal file
3
kovachevbot/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# KovachevBot commons
|
||||||
|
This is a directory which acts as a Python module, containing numerous functions and patterns
|
||||||
|
that I typically reuse throughout my bot code.
|
8
kovachevbot/kovachevbot.egg-info/PKG-INFO
Normal file
8
kovachevbot/kovachevbot.egg-info/PKG-INFO
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: kovachevbot
|
||||||
|
Version: 0.1
|
||||||
|
Summary: Library of generic functions and constants used in my bot scripts
|
||||||
|
Home-page: https://www.kovachev.xyz
|
||||||
|
Author: Kiril Kovachev
|
||||||
|
Author-email: kkovachev1947@gmail.com
|
||||||
|
License: MIT
|
9
kovachevbot/kovachevbot.egg-info/SOURCES.txt
Normal file
9
kovachevbot/kovachevbot.egg-info/SOURCES.txt
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
README.md
|
||||||
|
setup.py
|
||||||
|
kovachevbot/__init__.py
|
||||||
|
kovachevbot/common.py
|
||||||
|
kovachevbot.egg-info/PKG-INFO
|
||||||
|
kovachevbot.egg-info/SOURCES.txt
|
||||||
|
kovachevbot.egg-info/dependency_links.txt
|
||||||
|
kovachevbot.egg-info/not-zip-safe
|
||||||
|
kovachevbot.egg-info/top_level.txt
|
1
kovachevbot/kovachevbot.egg-info/dependency_links.txt
Normal file
1
kovachevbot/kovachevbot.egg-info/dependency_links.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
1
kovachevbot/kovachevbot.egg-info/not-zip-safe
Normal file
1
kovachevbot/kovachevbot.egg-info/not-zip-safe
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
1
kovachevbot/kovachevbot.egg-info/top_level.txt
Normal file
1
kovachevbot/kovachevbot.egg-info/top_level.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
kovachevbot
|
3
kovachevbot/kovachevbot/__init__.py
Normal file
3
kovachevbot/kovachevbot/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# When `import kovachevbot` is run, extract all of its data under the simple namespace `kovachevbot`
|
||||||
|
# (as opposed to resolving `kovachevbot.common` for all module data)
|
||||||
|
from kovachevbot.common import *
|
180
kovachevbot/kovachevbot/common.py
Normal file
180
kovachevbot/kovachevbot/common.py
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import pywikibot
|
||||||
|
import tkinter
|
||||||
|
import sys
|
||||||
|
import itertools
|
||||||
|
import mwparserfromhell
|
||||||
|
import regex as re
|
||||||
|
from typing import Generator, Iterator
|
||||||
|
|
||||||
|
|
||||||
|
WIKTIONARY = pywikibot.Site("en", "wiktionary")
|
||||||
|
TEMPLATE_NAMESPACE = WIKTIONARY.namespaces.TEMPLATE
|
||||||
|
MAIN_NAMESPACE = WIKTIONARY.namespaces.MAIN
|
||||||
|
COMMONS = pywikibot.Site("commons", "commons")
|
||||||
|
LINK_PATTERN = re.compile(r"\[\[(.+?)(?:\|(.+?))?\]\]")
|
||||||
|
|
||||||
|
|
||||||
|
def wikt_page(title: str) -> pywikibot.Page:
|
||||||
|
return pywikibot.Page(WIKTIONARY, title)
|
||||||
|
|
||||||
|
def save_gui(page: pywikibot.Page, default_edit_summary: str = "") -> bool:
|
||||||
|
"""Returns whether the edit was successfully completed through the save button or not."""
|
||||||
|
window = tkinter.Tk()
|
||||||
|
window.title(f"Editing page {page.title()}")
|
||||||
|
window.geometry("800x600")
|
||||||
|
window.config(bg="#000000")
|
||||||
|
|
||||||
|
page_text_label = tkinter.Label(master=window, text="Page contents")
|
||||||
|
page_text_box = tkinter.Text(master=window)
|
||||||
|
page_text_box.place(x=0, y=0)
|
||||||
|
page_text_box.pack(fill="x", expand=False, padx=20, pady=0)
|
||||||
|
page_text_box.insert("1.0", page.text)
|
||||||
|
|
||||||
|
edit_summary_label = tkinter.Label(window, text="Edit summary")
|
||||||
|
edit_summar_var = tkinter.StringVar()
|
||||||
|
edit_summar_var.set(default_edit_summary)
|
||||||
|
edit_summary_box = tkinter.Entry(window, textvariable=edit_summar_var, width=60)
|
||||||
|
edit_summary_label.place(x=20, y=500)
|
||||||
|
edit_summary_box.place(x=20, y=525)
|
||||||
|
|
||||||
|
success = False
|
||||||
|
|
||||||
|
def save_content():
|
||||||
|
page.text = page_text_box.get("1.0", "end")
|
||||||
|
edit_summary = edit_summar_var.get()
|
||||||
|
window.destroy()
|
||||||
|
page.save(edit_summary, minor=False)
|
||||||
|
nonlocal success
|
||||||
|
success = True
|
||||||
|
|
||||||
|
button = tkinter.Button(window, text="Save", command=save_content)
|
||||||
|
button.place(x=400, y=550)
|
||||||
|
|
||||||
|
window.mainloop()
|
||||||
|
return success
|
||||||
|
|
||||||
|
# save_gui(pywikibot.Page(WIKTIONARY, "User:Kiril kovachev/Sandbox"))
|
||||||
|
|
||||||
|
def convert_link_to_plaintext(link: mwparserfromhell.wikicode.Wikilink) -> str:
|
||||||
|
if link.text is not None:
|
||||||
|
if link.text == "": return link.title
|
||||||
|
else: return link.text
|
||||||
|
else:
|
||||||
|
return link.title
|
||||||
|
|
||||||
|
def links_to_plaintext(text: str) -> str:
|
||||||
|
parsed: mwparserfromhell.wikicode.Wikicode = mwparserfromhell.parse(text)
|
||||||
|
links = parsed.filter(forcetype=mwparserfromhell.wikicode.Wikilink)
|
||||||
|
for link in links:
|
||||||
|
plain = convert_link_to_plaintext(link)
|
||||||
|
parsed.replace(link, plain)
|
||||||
|
|
||||||
|
return str(parsed)
|
||||||
|
|
||||||
|
ABORT_CHECK_INTERVAL = 5
|
||||||
|
HALT_PAGE = wikt_page("User:KovachevBot/halt") # Do not edit, please!
|
||||||
|
|
||||||
|
def iterate_with_abort_check(iterator: Iterator, interval: int = ABORT_CHECK_INTERVAL, halt_page = HALT_PAGE):
|
||||||
|
"""
|
||||||
|
Run over an iterator, checking at every interval of 5 (or other specified value)
|
||||||
|
whether the bot has been ordered to stop. The failsafe site is defined as User:KovachevBot/halt by default.
|
||||||
|
"""
|
||||||
|
for edit_count, value in enumerate(iterator):
|
||||||
|
# Check halt page
|
||||||
|
if edit_count % interval == 0:
|
||||||
|
if "halt" in halt_page.text.casefold():
|
||||||
|
print(f"ERROR: BOT WAS MANUALLY HALTED BY {halt_page.userName()}", file=sys.stderr)
|
||||||
|
return
|
||||||
|
yield value
|
||||||
|
|
||||||
|
def iterate_entries(iterator: Iterator, max_edits: int = None):
|
||||||
|
"""Iterate at most `max_edits` entries of an iterator (of pages), or unlimited.
|
||||||
|
If no `max_edits` is provided as an arg, try to get the value from the command-line arguments.
|
||||||
|
If it still isn't found, default to running indefinitely.
|
||||||
|
If it is provided, but it's not a valid integer, it will default to unlimited again.
|
||||||
|
In the unlimited case, this effectively means this iterator will run until the original one is exhausted.
|
||||||
|
"""
|
||||||
|
if max_edits is None:
|
||||||
|
try:
|
||||||
|
edit_iter = range(int(pywikibot.argvu[1]))
|
||||||
|
except:
|
||||||
|
edit_iter = itertools.count()
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
edit_iter = range(int(max_edits))
|
||||||
|
except ValueError:
|
||||||
|
edit_iter = itertools.count()
|
||||||
|
|
||||||
|
for _, value in zip(edit_iter, iterator):
|
||||||
|
yield value
|
||||||
|
|
||||||
|
def iterate_safe(iterator: Iterator, max_entries: int = None, abort_check_interval: int = ABORT_CHECK_INTERVAL, halt_page: pywikibot.Page = HALT_PAGE):
|
||||||
|
"""Iterate safely over an iterator of pages, checking every `abort_check_interval` for whether to halt
|
||||||
|
the bot based on a user's manual request (by editing the `halt_page` to contain the word 'halt'),
|
||||||
|
yielding at most `max_entries`.
|
||||||
|
"""
|
||||||
|
return iterate_entries(iterate_with_abort_check(iterator, abort_check_interval, halt_page), max_entries)
|
||||||
|
|
||||||
|
def iterate_tracking(tracking_page: str) -> Generator[pywikibot.Page, None, None]:
|
||||||
|
"""
|
||||||
|
Iterate over pages in a tracking category on Wiktionary (linked to within Template:tracking/(page_name_here)).
|
||||||
|
`tracking_page` should be the name of the tracking category: e.g. if you want to iterate
|
||||||
|
over `Template:tracking/ja-pron/yomi`, you would enter `ja-pron/yomi`.
|
||||||
|
Returns only entries in the main entry namespace.
|
||||||
|
"""
|
||||||
|
return pywikibot.Page(WIKTIONARY, f"tracking/{tracking_page}", ns=TEMPLATE_NAMESPACE).getReferences(only_template_inclusion=True, namespaces=[MAIN_NAMESPACE])
|
||||||
|
|
||||||
|
def iterate_category(category_name: str) -> Generator[pywikibot.Page, None, None]:
|
||||||
|
"""Iterate pages in a category on Wiktionary.
|
||||||
|
The `category_name` should be the name without the Category: namespace, e.g.
|
||||||
|
`category_name="Bulgarian lemmas"`.
|
||||||
|
"""
|
||||||
|
return pywikibot.Category(WIKTIONARY, category_name).articles(namespaces=[MAIN_NAMESPACE])
|
||||||
|
|
||||||
|
def backup_page(old_text: str, new_page: pywikibot.Page, backup_path: str, file_name: str = None) -> None:
|
||||||
|
"""
|
||||||
|
Copy the contents of the page to local storage for backup in case there is a problem
|
||||||
|
with the script later; this will allow the error to be automatically corrected at that time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
file_name = file_name or new_page.title()
|
||||||
|
os.makedirs(backup_path, exist_ok=True)
|
||||||
|
|
||||||
|
with open("temp1", mode="w", encoding="utf-8") as f:
|
||||||
|
f.write(old_text)
|
||||||
|
|
||||||
|
with open("temp2", mode="w", encoding="utf-8") as f:
|
||||||
|
f.write(new_page.text)
|
||||||
|
|
||||||
|
diff = subprocess.getoutput("diff -u temp2 temp1") # Get differences between new revision and previous
|
||||||
|
diff = diff + "\n" # patch will complain if we don't end the file with a newline
|
||||||
|
|
||||||
|
with open(os.path.join(backup_path, new_page.title()), mode="w", encoding="utf-8") as f:
|
||||||
|
f.write(diff)
|
||||||
|
|
||||||
|
def add_l2(parsed: mwparserfromhell.wikicode.Wikicode, l2_section: mwparserfromhell.wikicode.Wikicode) -> None:
|
||||||
|
parsed = mwparserfromhell.parse(parsed)
|
||||||
|
l2_section = mwparserfromhell.parse(l2_section)
|
||||||
|
|
||||||
|
l2_title = l2_section.nodes[0].title
|
||||||
|
|
||||||
|
if l2_title in [section.nodes[0].title for section in parsed.get_sections([2])]:
|
||||||
|
return
|
||||||
|
|
||||||
|
new = mwparserfromhell.parse("")
|
||||||
|
|
||||||
|
l2_sections = parsed.get_sections([2])
|
||||||
|
l2_sections.append(l2_section)
|
||||||
|
|
||||||
|
l2_sections.sort(key=lambda section: section.nodes[0].title)
|
||||||
|
|
||||||
|
for section in l2_sections:
|
||||||
|
section.append("\n\n")
|
||||||
|
new.append(section)
|
||||||
|
|
||||||
|
parsed.nodes = new.nodes
|
||||||
|
|
||||||
|
while "\n\n\n" in parsed:
|
||||||
|
parsed.replace("\n\n\n", "\n\n")
|
11
kovachevbot/setup.py
Normal file
11
kovachevbot/setup.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(name="kovachevbot",
|
||||||
|
version="0.1",
|
||||||
|
description="Library of generic functions and constants used in my bot scripts",
|
||||||
|
url="https://www.kovachev.xyz",
|
||||||
|
author="Kiril Kovachev",
|
||||||
|
author_email="kkovachev1947@gmail.com",
|
||||||
|
license="MIT",
|
||||||
|
packages=["kovachevbot"],
|
||||||
|
zip_safe=False)
|
Loading…
Reference in New Issue
Block a user