Initial commit (WIP)

This commit is contained in:
Kiril Kovachev 2024-10-09 23:44:40 +01:00
commit 835b5bf123
15 changed files with 487 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
aozorabunko-dedupe-clean.jsonl
.venv
*.pyc
__pycache__/
dist/
build/
*.egg-info/
*.db
.vscode
instance

13
README.md Normal file
View File

@ -0,0 +1,13 @@
# KankenOnline
This project intends to provide a website that can generate practice questions for Kanji Kentei level 1.
I draw from both definition data and numerous texts from Aozora Bunko to create each style of question from 1 to 9.
## Running
- `wget https://huggingface.co/datasets/globis-university/aozorabunko-clean/resolve/main/aozorabunko-dedupe-clean.jsonl.gz` to get the Aozora data
- `gunzip aozorabunko-dedupe-clean.jsonl` to extract the data to a single file
## Sources
- [Aozora Bunko cleaned corpus on GitHub](https://github.com/globis-org/aozorabunko-extractor?tab=readme-ov-file)
- [Hugging Face download](https://huggingface.co/datasets/globis-university/aozorabunko-clean)
# This early build based on Flask tutorial

43
kanken_online/__init__.py Normal file
View File

@ -0,0 +1,43 @@
import os
from flask import Flask, render_template
from pathlib import Path
from .auth import login_required
DATABASE_NAME = "kanken_online.sqlite"
def create_app(test_config=None):
app = Flask(__name__, instance_relative_config=True)
app.config.from_mapping(
SECRET_KEY="dev",
DATABASE=str(Path(app.instance_path) / DATABASE_NAME)
)
if test_config is None:
app.config.from_pyfile("config.py", silent=True)
else:
app.config.from_mapping(test_config)
# Ensure instance path exists
os.makedirs(app.instance_path, exist_ok=True)
@app.route("/hello")
def hello():
return "Hello, World!"
@app.route("/")
def index():
return render_template("index.html")
@app.route("/options")
@login_required
def options():
return "options"
from . import database
database.initialize_app(app)
from . import auth, api
app.register_blueprint(auth.blueprint)
app.register_blueprint(api.blueprint)
return app

148
kanken_online/api.py Normal file
View File

@ -0,0 +1,148 @@
import functools
import json
from flask import Blueprint, jsonify
import jsonpickle
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from .database import get_database
blueprint = Blueprint("api", __name__, url_prefix="/api")
@blueprint.route("/")
def logout():
# db = get_database()
return {
"endpoints": ["id", "kanji", "kotoba (not implemented)"]
}
import random
import sqlalchemy
from typing import List, Optional, Iterable
from sqlalchemy import URL, ForeignKey, String, Boolean, Text, Integer
from sqlalchemy.types import CHAR
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship
class Base(DeclarativeBase):
pass
# class Reading(Base):
# __tablename__ = "reading"
# id: Mapped[int] = mapped_column(primary_key=True)
# reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
# kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
# kanji: Mapped["Kanji"] = relationship(back_populates="readings")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Goon(Base):
__tablename__ = "goon"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="goon")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Kanon(Base):
__tablename__ = "kanon"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="kanon")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Kanyoon(Base):
__tablename__ = "kanyoon"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="kanyoon")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Soon(Base):
__tablename__ = "soon"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="soon")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Toon(Base):
__tablename__ = "toon"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="toon")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Kun(Base):
__tablename__ = "kun"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="kun")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class UnclassifiedOn(Base):
__tablename__ = "unclassified_on"
id: Mapped[int] = mapped_column(primary_key=True)
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
kanji: Mapped["Kanji"] = relationship(back_populates="unclassified_on")
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
class Kanji(Base):
__tablename__ = "kanji"
id: Mapped[int] = mapped_column(primary_key=True)
character: Mapped[str] = mapped_column(CHAR(length=1), unique=True)
level: Mapped[str] = mapped_column(String(length=2)) # Either 1, 2, etc. or 準2 etc.
is_kokuji: Mapped[bool] = mapped_column(Boolean())
meanings: Mapped[str] = mapped_column(Text()) # FIXME: make this a list
# readings: Mapped[List[Reading]] = relationship(back_populates="kanji")
goon: Mapped[List[Goon]] = relationship(back_populates="kanji")
kanon: Mapped[List[Kanon]] = relationship(back_populates="kanji")
kanyoon: Mapped[List[Kanyoon]] = relationship(back_populates="kanji")
toon: Mapped[List[Toon]] = relationship(back_populates="kanji")
soon: Mapped[List[Soon]] = relationship(back_populates="kanji")
kun: Mapped[List[Kun]] = relationship(back_populates="kanji")
unclassified_on: Mapped[List[UnclassifiedOn]] = relationship(back_populates="kanji")
radical: Mapped[str] = mapped_column(CHAR(length=1)) # FIXME: normalize?
stroke_count: Mapped[int] = mapped_column(Integer())
radical_added_stroke_count: Mapped[int] = mapped_column(Integer()) # FIXME: normalize? this may theoretically be calculated based on the radical stroke count, but I need to validate that this always works
glyph_origin: Mapped[str] = mapped_column(Text()) # FIXME: make this a list of possible explanations, possibly, but unsure
# diagram: Mapped[str] = ... could be calculated from the kanji name
def to_json(self):
out = {}
for attr, value in self.__dict__.items():
if not isinstance(value, (bool, int, str)):
continue
else:
out[attr] = value
for attr in ("goon", "kanon", "kanyoon", "toon", "soon", "kun", "unclassified_on"):
out[attr] = [reading_obj.reading for reading_obj in getattr(self, attr)]
return out
@blueprint.route("/id/<int:kanji_id>")
def kanji_by_id(kanji_id: int):
engine = create_engine("sqlite:///kanken_online/kanken.db")
with Session(engine) as session:
query = select(Kanji).where(Kanji.id == kanji_id)
item = session.execute(query).first()
if item is None:
return "Invalid ID", 404
kanji = item[0]
return kanji.to_json()
@blueprint.route("/kanji/<kanji>")
def kanji_by_character(kanji: str):
engine = create_engine("sqlite:///kanken_online/kanken.db")
with Session(engine) as session:
query = select(Kanji).where(Kanji.character == kanji)
item = session.execute(query).first()
if item is None:
return "Invalid kanji", 404
kanji_obj = item[0]
return kanji_obj.to_json()

86
kanken_online/auth.py Normal file
View File

@ -0,0 +1,86 @@
import functools
from flask import Blueprint, flash, g, redirect, render_template, request, session, url_for
from werkzeug.security import check_password_hash, generate_password_hash
from .database import get_database
blueprint = Blueprint("auth", __name__, url_prefix="/auth")
@blueprint.route("/register", methods=("GET", "POST"))
def register():
if request.method == "POST":
username = request.form["username"]
password = request.form["password"]
db = get_database()
error = None
if not username:
error = "Username is required."
elif not password:
error = "Password is required."
if error is None:
try:
db.execute(
"INSERT INTO user (username, password) VALUES (?, ?)",
(username, generate_password_hash(password)),
)
db.commit()
except db.IntegrityError:
error = f"User {username} is already registered."
else:
return redirect(url_for("auth.login"))
flash(error)
return render_template("auth/register.html")
@blueprint.route("/login", methods=("GET", "POST"))
def login():
if request.method == "POST":
username = request.form["username"]
password = request.form["password"]
db = get_database()
error = None
user = db.execute(
"SELECT * FROM user WHERE username = ?", (username,)
).fetchone()
if user is None:
error = "Incorrect username."
elif not check_password_hash(user["password"], password):
error = "Incorrect password."
if error is None:
session.clear()
session["user_id"] = user["id"]
return redirect(url_for("index"))
flash(error)
return render_template("auth/login.html")
@blueprint.before_app_request
def load_logged_in_user():
user_id = session.get("user_id")
if user_id is None:
g.user = None
else:
g.user = get_database().execute(
"SELECT * FROM user WHERE id = ?", (user_id,)
).fetchone()
@blueprint.route("/logout")
def logout():
session.clear()
return redirect(url_for("index"))
def login_required(view):
@functools.wraps(view)
def wrapped_view(**kwargs):
if g.user is None:
return redirect(url_for("auth.login"))
return view(**kwargs)
return wrapped_view

40
kanken_online/database.py Normal file
View File

@ -0,0 +1,40 @@
import sqlite3
from typing import IO
import click
from flask import Flask, current_app, g
def get_database():
if "db" not in g:
g.db = sqlite3.connect(
current_app.config["DATABASE"],
detect_types=sqlite3.PARSE_DECLTYPES
)
g.db.row_factory = sqlite3.Row
return g.db
def initialize_database():
db = get_database()
with current_app.open_resource("schema.sql") as f:
f: IO[bytes]
db.executescript(f.read().decode())
def close_database(e=None):
db = g.pop("db", None)
if db is not None:
db.close()
@click.command("init-db")
def init_db_command():
"""Wipe the existing database and create new tables."""
if input("Are you sure you wish to overwrite any existing database? (y/n) ") == "y":
initialize_database()
click.echo("Initialized the database.")
else:
click.echo("Aborted.")
def initialize_app(app: Flask):
app.teardown_appcontext(close_database)
app.cli.add_command(init_db_command)

18
kanken_online/schema.sql Normal file
View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS user;
-- DROP TABLE IF EXISTS post;
CREATE TABLE user (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT UNIQUE NOT NULL,
password TEXT NOT NULL
);
-- CREATE TABLE post (
-- id INTEGER PRIMARY KEY AUTOINCREMENT,
-- author_id INTEGER NOT NULL,
-- created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
-- title TEXT NOT NULL,
-- body TEXT NOT NULL,
-- FOREIGN KEY (author_id) REFERENCES user (id)
-- );

View File

@ -0,0 +1,27 @@
/* From the Flask tutorial */
html { font-family: sans-serif; background: #eee; padding: 1rem; }
body { max-width: 960px; margin: 0 auto; background: white; }
h1 { font-family: serif; color: #377ba8; margin: 1rem 0; }
a { color: #377ba8; }
hr { border: none; border-top: 1px solid lightgray; }
nav { background: lightgray; display: flex; align-items: center; padding: 0 0.5rem; }
nav h1 { flex: auto; margin: 0; }
nav h1 a { text-decoration: none; padding: 0.25rem 0.5rem; }
nav ul { display: flex; list-style: none; margin: 0; padding: 0; }
nav ul li a, nav ul li span, header .action { display: block; padding: 0.5rem; }
.content { padding: 0 1rem 1rem; }
.content > header { border-bottom: 1px solid lightgray; display: flex; align-items: flex-end; }
.content > header h1 { flex: auto; margin: 1rem 0 0.25rem 0; }
.flash { margin: 1em 0; padding: 1em; background: #cae6f6; border: 1px solid #377ba8; }
.post > header { display: flex; align-items: flex-end; font-size: 0.85em; }
.post > header > div:first-of-type { flex: auto; }
.post > header h1 { font-size: 1.5em; margin-bottom: 0; }
.post .about { color: slategray; font-style: italic; }
.post .body { white-space: pre-line; }
.content:last-child { margin-bottom: 0; }
.content form { margin: 1em 0; display: flex; flex-direction: column; }
.content label { font-weight: bold; margin-bottom: 0.5em; }
.content input, .content textarea { margin-bottom: 1em; }
.content textarea { min-height: 12em; resize: vertical; }
input.danger { color: #cc2f2e; }
input[type=submit] { align-self: start; min-width: 10em; }

View File

@ -0,0 +1,15 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %}Log In{% endblock %}</h1>
{% endblock %}
{% block content %}
<form method="post">
<label for="username">Username</label>
<input name="username" id="username" required autofocus>
<label for="password">Password</label>
<input type="password" name="password" id="password" required>
<input type="submit" value="Log In">
</form>
{% endblock %}

View File

@ -0,0 +1,15 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %}Register{% endblock %}</h1>
{% endblock %}
{% block content %}
<form method="post">
<label for="username">Username</label>
<input name="username" id="username" required autofocus>
<label for="password">Password</label>
<input type="password" name="password" id="password" required>
<input type="submit" value="Register">
</form>
{% endblock %}

View File

@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<title>{% block title %}{% endblock %} - KankenOnline</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
<nav>
<h1>KankenOnline</h1>
<ul>
{% if g.user %}
<li><span>{{ g.user['username'] }}</span>
<li><a href="{{ url_for('options') }}">Options</a>
<li><a href="{{ url_for('auth.logout') }}">Log Out</a>
{% else %}
<li><a href="{{ url_for('auth.register') }}">Register</a>
<li><a href="{{ url_for('auth.login') }}">Log In</a>
{% endif %}
</ul>
</nav>
<section class="content">
<header>
{% block header %}{% endblock %}
</header>
{% for message in get_flashed_messages() %}
<div class="flash">{{ message }}</div>
{% endfor %}
{% block content %}{% endblock %}
</section>
</body>
</html>

View File

@ -0,0 +1,9 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %}Main Page{% endblock %}</h1>
{% endblock %}
{% block content %}
Blahdy blah
{% endblock %}

View File

@ -0,0 +1,9 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %}Options{% endblock %}</h1>
{% endblock %}
{% block content %}
Blahdy blah
{% endblock %}

11
pyproject.toml Normal file
View File

@ -0,0 +1,11 @@
[project]
name = "kanken_online"
version = "0.1.0"
description = "Online Kanken practice and information portal."
dependencies = [
"flask",
]
[build-system]
requires = ["flit_core<4"]
build-backend = "flit_core.buildapi"

11
setup.py Normal file
View File

@ -0,0 +1,11 @@
from setuptools import setup
setup(
name="kanken_online",
version="0.1.0",
long_description=__doc__ or "",
packages=["kanken_online"],
include_package_data=True,
zip_safe=False,
install_requires=["Flask"]
)