Add ѝ/и distinction

This commit is contained in:
Spooghetti420 2023-08-02 14:22:44 +01:00
parent d0ab3f8d1e
commit ce72400fd8
2 changed files with 11 additions and 4 deletions

View File

@ -15,7 +15,8 @@ RE_MATCH_CATEGORIES = re.compile(fr"({RE_CAT_TEMPLATES}|{RE_CATEGORIES})")
SITE = pywikibot.Site("en", "wiktionary")
BACKUP_PATH = "bg-anagrams-backup"
ALPHABET = "абвгдежзийклмнопрстуфхцчшщъьюя"
NON_ALPHABETIC = f"[^{ALPHABET}]"
NUMERIC = "0123456789"
NON_ALPHANUMERIC = f"[^{ALPHABET}{NUMERIC}]"
def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
"""
@ -36,7 +37,8 @@ def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
f.write(diff)
def normalise(word: str) -> str:
return re.sub(NON_ALPHABETIC, "", re.sub("\s", "", word.strip().casefold()))
return re.sub(NON_ALPHANUMERIC, "", re.sub("ѝ", "и", word.casefold()))
# return re.sub("[-.;:?!‒–—]", "", re.sub("\s", "", word.casefold()))
def get_alphagram(word: str) -> str:
return "".join(sorted(normalise(word)))
@ -54,6 +56,9 @@ for word in wordlist:
anagrams = {letter_count: anas for letter_count, anas in anagrams.items() if len(anas) > 1} # Only keep words with multiple anagrams
def count_anagrams():
return sum(len(anagram_list) for anagram_list in anagrams.values())
def generate_anagrams_section(anagrams: set[str]) -> str:
return "\n\n===Anagrams===\n* " + generate_anagrams_template(anagrams, get_alphagram(anagrams.copy().pop())) + "\n\n"
@ -132,7 +137,9 @@ def main():
except:
LIMIT = -1
print("Preparing to iterate over", len(anagrams), "alphragrams")
print(anagrams["и"])
print("Preparing to iterate over", len(anagrams), "alphragrams", f"({count_anagrams()} anagrams)")
return
edit_count = 0 # Updated for every individual page
iterations = 0 # Updated for every set of anagrams

View File

@ -34356,7 +34356,7 @@
ей
ето
зер
и
ѝ
именно
леле
ли