Add objective/subjective converter script

This commit is contained in:
Kiril Kovachev 2023-09-01 18:55:23 +01:00
parent 8dc7c70cd9
commit de4d24c97c
3 changed files with 3671 additions and 0 deletions

View File

@ -0,0 +1,35 @@
import kovachevbot
import pywikibot
import mwparserfromhell
import sys
def fix_inflection(template: mwparserfromhell.wikicode.Template):
for param in template.params:
param: mwparserfromhell.nodes.extras.Parameter
if param.value == "sbjv":
param.value = "sbj"
elif param.value == "objv":
param.value = "obj"
def main() -> None:
with open("words-to-edit.txt") as f:
words_to_fix = f.read().splitlines()
for page in kovachevbot.iterate_safe((kovachevbot.wikt_page(word) for word in words_to_fix), max_entries=1):
page: pywikibot.Page
title = page.title()
parsed = mwparserfromhell.parse(page.text)
try:
bulgarian_section: mwparserfromhell.wikicode.Wikicode = parsed.get_sections([2], "Bulgarian")[0]
except IndexError:
print(f"Error: page {title} has no Bulgarian content", file=sys.stderr)
for inflection_template in bulgarian_section.filter(forcetype=mwparserfromhell.wikicode.Template, matches="{{inflection of|bg|.*?}}"):
inflection_template: mwparserfromhell.wikicode.Template
fix_inflection(inflection_template)
page.text = str(parsed)
page.save("Convert sbjv/objv into sbj/obj in Bulgarian inflections")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,13 @@
import json
out_words = set()
with open("kaikki_dot_org-dictionary-Bulgarian-inflected.json") as f:
for line in f:
entry = json.loads(line)
word = entry["word"]
for sense in entry["senses"]:
if "objective" in sense["tags"] or "subjective" in sense["tags"]:
out_words.add(word)
with open("words-to-edit.txt", mode="w") as f:
f.write("\n".join(out_words))

File diff suppressed because it is too large Load Diff