commit 835b5bf1237beffbaa7e1a4dda2e26e8d2dea30f Author: Kiril Kovachev Date: Wed Oct 9 23:44:40 2024 +0100 Initial commit (WIP) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fc197e0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +aozorabunko-dedupe-clean.jsonl +.venv +*.pyc +__pycache__/ +dist/ +build/ +*.egg-info/ +*.db +.vscode +instance \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d0865d --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# KankenOnline +This project intends to provide a website that can generate practice questions for Kanji Kentei level 1. +I draw from both definition data and numerous texts from Aozora Bunko to create each style of question from 1 to 9. + +## Running +- `wget https://huggingface.co/datasets/globis-university/aozorabunko-clean/resolve/main/aozorabunko-dedupe-clean.jsonl.gz` to get the Aozora data +- `gunzip aozorabunko-dedupe-clean.jsonl` to extract the data to a single file + +## Sources +- [Aozora Bunko cleaned corpus on GitHub](https://github.com/globis-org/aozorabunko-extractor?tab=readme-ov-file) + - [Hugging Face download](https://huggingface.co/datasets/globis-university/aozorabunko-clean) + +# This early build based on Flask tutorial \ No newline at end of file diff --git a/kanken_online/__init__.py b/kanken_online/__init__.py new file mode 100644 index 0000000..9812c28 --- /dev/null +++ b/kanken_online/__init__.py @@ -0,0 +1,43 @@ +import os +from flask import Flask, render_template +from pathlib import Path +from .auth import login_required + + +DATABASE_NAME = "kanken_online.sqlite" +def create_app(test_config=None): + app = Flask(__name__, instance_relative_config=True) + app.config.from_mapping( + SECRET_KEY="dev", + DATABASE=str(Path(app.instance_path) / DATABASE_NAME) + ) + + if test_config is None: + app.config.from_pyfile("config.py", silent=True) + else: + app.config.from_mapping(test_config) + + # Ensure instance path exists + os.makedirs(app.instance_path, exist_ok=True) + + @app.route("/hello") + def hello(): + return "Hello, World!" + + @app.route("/") + def index(): + return render_template("index.html") + + @app.route("/options") + @login_required + def options(): + return "options" + + from . import database + database.initialize_app(app) + + from . import auth, api + app.register_blueprint(auth.blueprint) + app.register_blueprint(api.blueprint) + + return app \ No newline at end of file diff --git a/kanken_online/api.py b/kanken_online/api.py new file mode 100644 index 0000000..dd21e50 --- /dev/null +++ b/kanken_online/api.py @@ -0,0 +1,148 @@ +import functools +import json +from flask import Blueprint, jsonify +import jsonpickle +from sqlalchemy import create_engine, select +from sqlalchemy.orm import Session +from .database import get_database + +blueprint = Blueprint("api", __name__, url_prefix="/api") + +@blueprint.route("/") +def logout(): + # db = get_database() + return { + "endpoints": ["id", "kanji", "kotoba (not implemented)"] + } + +import random +import sqlalchemy +from typing import List, Optional, Iterable +from sqlalchemy import URL, ForeignKey, String, Boolean, Text, Integer +from sqlalchemy.types import CHAR +from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship + +class Base(DeclarativeBase): + pass + +# class Reading(Base): +# __tablename__ = "reading" +# id: Mapped[int] = mapped_column(primary_key=True) +# reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall +# kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) +# kanji: Mapped["Kanji"] = relationship(back_populates="readings") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Goon(Base): + __tablename__ = "goon" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="goon") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Kanon(Base): + __tablename__ = "kanon" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="kanon") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Kanyoon(Base): + __tablename__ = "kanyoon" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="kanyoon") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Soon(Base): + __tablename__ = "soon" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="soon") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Toon(Base): + __tablename__ = "toon" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="toon") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Kun(Base): + __tablename__ = "kun" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="kun") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class UnclassifiedOn(Base): + __tablename__ = "unclassified_on" + id: Mapped[int] = mapped_column(primary_key=True) + reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall + kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id")) + kanji: Mapped["Kanji"] = relationship(back_populates="unclassified_on") + # reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓 + +class Kanji(Base): + __tablename__ = "kanji" + id: Mapped[int] = mapped_column(primary_key=True) + character: Mapped[str] = mapped_column(CHAR(length=1), unique=True) + level: Mapped[str] = mapped_column(String(length=2)) # Either 1, 2, etc. or 準2 etc. + is_kokuji: Mapped[bool] = mapped_column(Boolean()) + meanings: Mapped[str] = mapped_column(Text()) # FIXME: make this a list + # readings: Mapped[List[Reading]] = relationship(back_populates="kanji") + goon: Mapped[List[Goon]] = relationship(back_populates="kanji") + kanon: Mapped[List[Kanon]] = relationship(back_populates="kanji") + kanyoon: Mapped[List[Kanyoon]] = relationship(back_populates="kanji") + toon: Mapped[List[Toon]] = relationship(back_populates="kanji") + soon: Mapped[List[Soon]] = relationship(back_populates="kanji") + kun: Mapped[List[Kun]] = relationship(back_populates="kanji") + unclassified_on: Mapped[List[UnclassifiedOn]] = relationship(back_populates="kanji") + radical: Mapped[str] = mapped_column(CHAR(length=1)) # FIXME: normalize? + stroke_count: Mapped[int] = mapped_column(Integer()) + radical_added_stroke_count: Mapped[int] = mapped_column(Integer()) # FIXME: normalize? this may theoretically be calculated based on the radical stroke count, but I need to validate that this always works + glyph_origin: Mapped[str] = mapped_column(Text()) # FIXME: make this a list of possible explanations, possibly, but unsure + # diagram: Mapped[str] = ... could be calculated from the kanji name + + def to_json(self): + out = {} + for attr, value in self.__dict__.items(): + if not isinstance(value, (bool, int, str)): + continue + else: + out[attr] = value + + for attr in ("goon", "kanon", "kanyoon", "toon", "soon", "kun", "unclassified_on"): + out[attr] = [reading_obj.reading for reading_obj in getattr(self, attr)] + + return out + +@blueprint.route("/id/") +def kanji_by_id(kanji_id: int): + engine = create_engine("sqlite:///kanken_online/kanken.db") + with Session(engine) as session: + query = select(Kanji).where(Kanji.id == kanji_id) + item = session.execute(query).first() + if item is None: + return "Invalid ID", 404 + + kanji = item[0] + return kanji.to_json() + +@blueprint.route("/kanji/") +def kanji_by_character(kanji: str): + engine = create_engine("sqlite:///kanken_online/kanken.db") + with Session(engine) as session: + query = select(Kanji).where(Kanji.character == kanji) + item = session.execute(query).first() + if item is None: + return "Invalid kanji", 404 + + kanji_obj = item[0] + return kanji_obj.to_json() diff --git a/kanken_online/auth.py b/kanken_online/auth.py new file mode 100644 index 0000000..6d8cc73 --- /dev/null +++ b/kanken_online/auth.py @@ -0,0 +1,86 @@ +import functools +from flask import Blueprint, flash, g, redirect, render_template, request, session, url_for +from werkzeug.security import check_password_hash, generate_password_hash +from .database import get_database + +blueprint = Blueprint("auth", __name__, url_prefix="/auth") + +@blueprint.route("/register", methods=("GET", "POST")) +def register(): + if request.method == "POST": + username = request.form["username"] + password = request.form["password"] + db = get_database() + error = None + + if not username: + error = "Username is required." + elif not password: + error = "Password is required." + + if error is None: + try: + db.execute( + "INSERT INTO user (username, password) VALUES (?, ?)", + (username, generate_password_hash(password)), + ) + db.commit() + except db.IntegrityError: + error = f"User {username} is already registered." + else: + return redirect(url_for("auth.login")) + + flash(error) + + return render_template("auth/register.html") + +@blueprint.route("/login", methods=("GET", "POST")) +def login(): + if request.method == "POST": + username = request.form["username"] + password = request.form["password"] + db = get_database() + error = None + user = db.execute( + "SELECT * FROM user WHERE username = ?", (username,) + ).fetchone() + + if user is None: + error = "Incorrect username." + elif not check_password_hash(user["password"], password): + error = "Incorrect password." + + if error is None: + session.clear() + session["user_id"] = user["id"] + return redirect(url_for("index")) + + flash(error) + + return render_template("auth/login.html") + +@blueprint.before_app_request +def load_logged_in_user(): + user_id = session.get("user_id") + + if user_id is None: + g.user = None + else: + g.user = get_database().execute( + "SELECT * FROM user WHERE id = ?", (user_id,) + ).fetchone() + +@blueprint.route("/logout") +def logout(): + session.clear() + return redirect(url_for("index")) + +def login_required(view): + @functools.wraps(view) + def wrapped_view(**kwargs): + if g.user is None: + return redirect(url_for("auth.login")) + + return view(**kwargs) + + return wrapped_view \ No newline at end of file diff --git a/kanken_online/database.py b/kanken_online/database.py new file mode 100644 index 0000000..ebbfb89 --- /dev/null +++ b/kanken_online/database.py @@ -0,0 +1,40 @@ +import sqlite3 +from typing import IO +import click +from flask import Flask, current_app, g + +def get_database(): + if "db" not in g: + g.db = sqlite3.connect( + current_app.config["DATABASE"], + detect_types=sqlite3.PARSE_DECLTYPES + ) + g.db.row_factory = sqlite3.Row + + return g.db + +def initialize_database(): + db = get_database() + + with current_app.open_resource("schema.sql") as f: + f: IO[bytes] + db.executescript(f.read().decode()) + +def close_database(e=None): + db = g.pop("db", None) + + if db is not None: + db.close() + +@click.command("init-db") +def init_db_command(): + """Wipe the existing database and create new tables.""" + if input("Are you sure you wish to overwrite any existing database? (y/n) ") == "y": + initialize_database() + click.echo("Initialized the database.") + else: + click.echo("Aborted.") + +def initialize_app(app: Flask): + app.teardown_appcontext(close_database) + app.cli.add_command(init_db_command) \ No newline at end of file diff --git a/kanken_online/schema.sql b/kanken_online/schema.sql new file mode 100644 index 0000000..90a6d19 --- /dev/null +++ b/kanken_online/schema.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS user; +-- DROP TABLE IF EXISTS post; + +CREATE TABLE user ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT UNIQUE NOT NULL, + password TEXT NOT NULL +); + +-- CREATE TABLE post ( +-- id INTEGER PRIMARY KEY AUTOINCREMENT, +-- author_id INTEGER NOT NULL, +-- created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, +-- title TEXT NOT NULL, +-- body TEXT NOT NULL, +-- FOREIGN KEY (author_id) REFERENCES user (id) +-- ); + diff --git a/kanken_online/static/style.css b/kanken_online/static/style.css new file mode 100644 index 0000000..c9a7349 --- /dev/null +++ b/kanken_online/static/style.css @@ -0,0 +1,27 @@ +/* From the Flask tutorial */ +html { font-family: sans-serif; background: #eee; padding: 1rem; } +body { max-width: 960px; margin: 0 auto; background: white; } +h1 { font-family: serif; color: #377ba8; margin: 1rem 0; } +a { color: #377ba8; } +hr { border: none; border-top: 1px solid lightgray; } +nav { background: lightgray; display: flex; align-items: center; padding: 0 0.5rem; } +nav h1 { flex: auto; margin: 0; } +nav h1 a { text-decoration: none; padding: 0.25rem 0.5rem; } +nav ul { display: flex; list-style: none; margin: 0; padding: 0; } +nav ul li a, nav ul li span, header .action { display: block; padding: 0.5rem; } +.content { padding: 0 1rem 1rem; } +.content > header { border-bottom: 1px solid lightgray; display: flex; align-items: flex-end; } +.content > header h1 { flex: auto; margin: 1rem 0 0.25rem 0; } +.flash { margin: 1em 0; padding: 1em; background: #cae6f6; border: 1px solid #377ba8; } +.post > header { display: flex; align-items: flex-end; font-size: 0.85em; } +.post > header > div:first-of-type { flex: auto; } +.post > header h1 { font-size: 1.5em; margin-bottom: 0; } +.post .about { color: slategray; font-style: italic; } +.post .body { white-space: pre-line; } +.content:last-child { margin-bottom: 0; } +.content form { margin: 1em 0; display: flex; flex-direction: column; } +.content label { font-weight: bold; margin-bottom: 0.5em; } +.content input, .content textarea { margin-bottom: 1em; } +.content textarea { min-height: 12em; resize: vertical; } +input.danger { color: #cc2f2e; } +input[type=submit] { align-self: start; min-width: 10em; } \ No newline at end of file diff --git a/kanken_online/templates/auth/login.html b/kanken_online/templates/auth/login.html new file mode 100644 index 0000000..bdf4547 --- /dev/null +++ b/kanken_online/templates/auth/login.html @@ -0,0 +1,15 @@ +{% extends 'base.html' %} + +{% block header %} +

{% block title %}Log In{% endblock %}

+{% endblock %} + +{% block content %} +
+ + + + + +
+{% endblock %} \ No newline at end of file diff --git a/kanken_online/templates/auth/register.html b/kanken_online/templates/auth/register.html new file mode 100644 index 0000000..5a8bf01 --- /dev/null +++ b/kanken_online/templates/auth/register.html @@ -0,0 +1,15 @@ +{% extends 'base.html' %} + +{% block header %} +

{% block title %}Register{% endblock %}

+{% endblock %} + +{% block content %} +
+ + + + + +
+{% endblock %} \ No newline at end of file diff --git a/kanken_online/templates/base.html b/kanken_online/templates/base.html new file mode 100644 index 0000000..b3ac547 --- /dev/null +++ b/kanken_online/templates/base.html @@ -0,0 +1,32 @@ + + + + + {% block title %}{% endblock %} - KankenOnline + + + + +
+
+ {% block header %}{% endblock %} +
+ {% for message in get_flashed_messages() %} +
{{ message }}
+ {% endfor %} + {% block content %}{% endblock %} +
+ + \ No newline at end of file diff --git a/kanken_online/templates/index.html b/kanken_online/templates/index.html new file mode 100644 index 0000000..9230368 --- /dev/null +++ b/kanken_online/templates/index.html @@ -0,0 +1,9 @@ +{% extends 'base.html' %} + +{% block header %} +

{% block title %}Main Page{% endblock %}

+{% endblock %} + +{% block content %} + Blahdy blah +{% endblock %} \ No newline at end of file diff --git a/kanken_online/templates/options.html b/kanken_online/templates/options.html new file mode 100644 index 0000000..d8c665a --- /dev/null +++ b/kanken_online/templates/options.html @@ -0,0 +1,9 @@ +{% extends 'base.html' %} + +{% block header %} +

{% block title %}Options{% endblock %}

+{% endblock %} + +{% block content %} + Blahdy blah +{% endblock %} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..68fccb1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "kanken_online" +version = "0.1.0" +description = "Online Kanken practice and information portal." +dependencies = [ + "flask", +] + +[build-system] +requires = ["flit_core<4"] +build-backend = "flit_core.buildapi" \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4616b78 --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup( + name="kanken_online", + version="0.1.0", + long_description=__doc__ or "", + packages=["kanken_online"], + include_package_data=True, + zip_safe=False, + install_requires=["Flask"] +)