Add objective/subjective converter script
This commit is contained in:
parent
8dc7c70cd9
commit
de4d24c97c
35
bulgarian-subject-object/bg-subject-object.py
Normal file
35
bulgarian-subject-object/bg-subject-object.py
Normal file
@ -0,0 +1,35 @@
|
||||
import kovachevbot
|
||||
import pywikibot
|
||||
import mwparserfromhell
|
||||
import sys
|
||||
|
||||
def fix_inflection(template: mwparserfromhell.wikicode.Template):
|
||||
for param in template.params:
|
||||
param: mwparserfromhell.nodes.extras.Parameter
|
||||
if param.value == "sbjv":
|
||||
param.value = "sbj"
|
||||
elif param.value == "objv":
|
||||
param.value = "obj"
|
||||
|
||||
def main() -> None:
|
||||
with open("words-to-edit.txt") as f:
|
||||
words_to_fix = f.read().splitlines()
|
||||
|
||||
for page in kovachevbot.iterate_safe((kovachevbot.wikt_page(word) for word in words_to_fix), max_entries=1):
|
||||
page: pywikibot.Page
|
||||
title = page.title()
|
||||
parsed = mwparserfromhell.parse(page.text)
|
||||
try:
|
||||
bulgarian_section: mwparserfromhell.wikicode.Wikicode = parsed.get_sections([2], "Bulgarian")[0]
|
||||
except IndexError:
|
||||
print(f"Error: page {title} has no Bulgarian content", file=sys.stderr)
|
||||
|
||||
for inflection_template in bulgarian_section.filter(forcetype=mwparserfromhell.wikicode.Template, matches="{{inflection of|bg|.*?}}"):
|
||||
inflection_template: mwparserfromhell.wikicode.Template
|
||||
fix_inflection(inflection_template)
|
||||
|
||||
page.text = str(parsed)
|
||||
page.save("Convert sbjv/objv into sbj/obj in Bulgarian inflections")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
13
bulgarian-subject-object/determine-targets-to-edit.py
Normal file
13
bulgarian-subject-object/determine-targets-to-edit.py
Normal file
@ -0,0 +1,13 @@
|
||||
import json
|
||||
|
||||
out_words = set()
|
||||
with open("kaikki_dot_org-dictionary-Bulgarian-inflected.json") as f:
|
||||
for line in f:
|
||||
entry = json.loads(line)
|
||||
word = entry["word"]
|
||||
for sense in entry["senses"]:
|
||||
if "objective" in sense["tags"] or "subjective" in sense["tags"]:
|
||||
out_words.add(word)
|
||||
|
||||
with open("words-to-edit.txt", mode="w") as f:
|
||||
f.write("\n".join(out_words))
|
3623
bulgarian-subject-object/words-to-edit.txt
Normal file
3623
bulgarian-subject-object/words-to-edit.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user