Add ѝ/и distinction
This commit is contained in:
parent
d0ab3f8d1e
commit
ce72400fd8
@ -15,7 +15,8 @@ RE_MATCH_CATEGORIES = re.compile(fr"({RE_CAT_TEMPLATES}|{RE_CATEGORIES})")
|
|||||||
SITE = pywikibot.Site("en", "wiktionary")
|
SITE = pywikibot.Site("en", "wiktionary")
|
||||||
BACKUP_PATH = "bg-anagrams-backup"
|
BACKUP_PATH = "bg-anagrams-backup"
|
||||||
ALPHABET = "абвгдежзийклмнопрстуфхцчшщъьюя"
|
ALPHABET = "абвгдежзийклмнопрстуфхцчшщъьюя"
|
||||||
NON_ALPHABETIC = f"[^{ALPHABET}]"
|
NUMERIC = "0123456789"
|
||||||
|
NON_ALPHANUMERIC = f"[^{ALPHABET}{NUMERIC}]"
|
||||||
|
|
||||||
def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
|
def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
|
||||||
"""
|
"""
|
||||||
@ -36,7 +37,8 @@ def create_diff(old_text: str, current_page: pywikibot.Page) -> None:
|
|||||||
f.write(diff)
|
f.write(diff)
|
||||||
|
|
||||||
def normalise(word: str) -> str:
|
def normalise(word: str) -> str:
|
||||||
return re.sub(NON_ALPHABETIC, "", re.sub("\s", "", word.strip().casefold()))
|
return re.sub(NON_ALPHANUMERIC, "", re.sub("ѝ", "и", word.casefold()))
|
||||||
|
# return re.sub("[-.;:?!‒–—]", "", re.sub("\s", "", word.casefold()))
|
||||||
|
|
||||||
def get_alphagram(word: str) -> str:
|
def get_alphagram(word: str) -> str:
|
||||||
return "".join(sorted(normalise(word)))
|
return "".join(sorted(normalise(word)))
|
||||||
@ -54,6 +56,9 @@ for word in wordlist:
|
|||||||
|
|
||||||
anagrams = {letter_count: anas for letter_count, anas in anagrams.items() if len(anas) > 1} # Only keep words with multiple anagrams
|
anagrams = {letter_count: anas for letter_count, anas in anagrams.items() if len(anas) > 1} # Only keep words with multiple anagrams
|
||||||
|
|
||||||
|
def count_anagrams():
|
||||||
|
return sum(len(anagram_list) for anagram_list in anagrams.values())
|
||||||
|
|
||||||
def generate_anagrams_section(anagrams: set[str]) -> str:
|
def generate_anagrams_section(anagrams: set[str]) -> str:
|
||||||
return "\n\n===Anagrams===\n* " + generate_anagrams_template(anagrams, get_alphagram(anagrams.copy().pop())) + "\n\n"
|
return "\n\n===Anagrams===\n* " + generate_anagrams_template(anagrams, get_alphagram(anagrams.copy().pop())) + "\n\n"
|
||||||
|
|
||||||
@ -132,7 +137,9 @@ def main():
|
|||||||
except:
|
except:
|
||||||
LIMIT = -1
|
LIMIT = -1
|
||||||
|
|
||||||
print("Preparing to iterate over", len(anagrams), "alphragrams")
|
print(anagrams["и"])
|
||||||
|
print("Preparing to iterate over", len(anagrams), "alphragrams", f"({count_anagrams()} anagrams)")
|
||||||
|
return
|
||||||
|
|
||||||
edit_count = 0 # Updated for every individual page
|
edit_count = 0 # Updated for every individual page
|
||||||
iterations = 0 # Updated for every set of anagrams
|
iterations = 0 # Updated for every set of anagrams
|
||||||
|
@ -34356,7 +34356,7 @@
|
|||||||
ей
|
ей
|
||||||
ето
|
ето
|
||||||
зер
|
зер
|
||||||
и
|
ѝ
|
||||||
именно
|
именно
|
||||||
леле
|
леле
|
||||||
ли
|
ли
|
||||||
|
Loading…
Reference in New Issue
Block a user