Initial commit (WIP)
This commit is contained in:
commit
835b5bf123
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
aozorabunko-dedupe-clean.jsonl
|
||||
.venv
|
||||
*.pyc
|
||||
__pycache__/
|
||||
dist/
|
||||
build/
|
||||
*.egg-info/
|
||||
*.db
|
||||
.vscode
|
||||
instance
|
13
README.md
Normal file
13
README.md
Normal file
@ -0,0 +1,13 @@
|
||||
# KankenOnline
|
||||
This project intends to provide a website that can generate practice questions for Kanji Kentei level 1.
|
||||
I draw from both definition data and numerous texts from Aozora Bunko to create each style of question from 1 to 9.
|
||||
|
||||
## Running
|
||||
- `wget https://huggingface.co/datasets/globis-university/aozorabunko-clean/resolve/main/aozorabunko-dedupe-clean.jsonl.gz` to get the Aozora data
|
||||
- `gunzip aozorabunko-dedupe-clean.jsonl` to extract the data to a single file
|
||||
|
||||
## Sources
|
||||
- [Aozora Bunko cleaned corpus on GitHub](https://github.com/globis-org/aozorabunko-extractor?tab=readme-ov-file)
|
||||
- [Hugging Face download](https://huggingface.co/datasets/globis-university/aozorabunko-clean)
|
||||
|
||||
# This early build based on Flask tutorial
|
43
kanken_online/__init__.py
Normal file
43
kanken_online/__init__.py
Normal file
@ -0,0 +1,43 @@
|
||||
import os
|
||||
from flask import Flask, render_template
|
||||
from pathlib import Path
|
||||
from .auth import login_required
|
||||
|
||||
|
||||
DATABASE_NAME = "kanken_online.sqlite"
|
||||
def create_app(test_config=None):
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
app.config.from_mapping(
|
||||
SECRET_KEY="dev",
|
||||
DATABASE=str(Path(app.instance_path) / DATABASE_NAME)
|
||||
)
|
||||
|
||||
if test_config is None:
|
||||
app.config.from_pyfile("config.py", silent=True)
|
||||
else:
|
||||
app.config.from_mapping(test_config)
|
||||
|
||||
# Ensure instance path exists
|
||||
os.makedirs(app.instance_path, exist_ok=True)
|
||||
|
||||
@app.route("/hello")
|
||||
def hello():
|
||||
return "Hello, World!"
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("index.html")
|
||||
|
||||
@app.route("/options")
|
||||
@login_required
|
||||
def options():
|
||||
return "options"
|
||||
|
||||
from . import database
|
||||
database.initialize_app(app)
|
||||
|
||||
from . import auth, api
|
||||
app.register_blueprint(auth.blueprint)
|
||||
app.register_blueprint(api.blueprint)
|
||||
|
||||
return app
|
148
kanken_online/api.py
Normal file
148
kanken_online/api.py
Normal file
@ -0,0 +1,148 @@
|
||||
import functools
|
||||
import json
|
||||
from flask import Blueprint, jsonify
|
||||
import jsonpickle
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import Session
|
||||
from .database import get_database
|
||||
|
||||
blueprint = Blueprint("api", __name__, url_prefix="/api")
|
||||
|
||||
@blueprint.route("/")
|
||||
def logout():
|
||||
# db = get_database()
|
||||
return {
|
||||
"endpoints": ["id", "kanji", "kotoba (not implemented)"]
|
||||
}
|
||||
|
||||
import random
|
||||
import sqlalchemy
|
||||
from typing import List, Optional, Iterable
|
||||
from sqlalchemy import URL, ForeignKey, String, Boolean, Text, Integer
|
||||
from sqlalchemy.types import CHAR
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column, relationship
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
# class Reading(Base):
|
||||
# __tablename__ = "reading"
|
||||
# id: Mapped[int] = mapped_column(primary_key=True)
|
||||
# reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
# kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
# kanji: Mapped["Kanji"] = relationship(back_populates="readings")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Goon(Base):
|
||||
__tablename__ = "goon"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="goon")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Kanon(Base):
|
||||
__tablename__ = "kanon"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="kanon")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Kanyoon(Base):
|
||||
__tablename__ = "kanyoon"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="kanyoon")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Soon(Base):
|
||||
__tablename__ = "soon"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="soon")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Toon(Base):
|
||||
__tablename__ = "toon"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="toon")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Kun(Base):
|
||||
__tablename__ = "kun"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="kun")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class UnclassifiedOn(Base):
|
||||
__tablename__ = "unclassified_on"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
reading: Mapped[str] = mapped_column(String(length=10)) # Assume no reading can be over 10 characters long, a sound assumption overall
|
||||
kanji_id: Mapped[int] = mapped_column(ForeignKey("kanji.id"))
|
||||
kanji: Mapped["Kanji"] = relationship(back_populates="unclassified_on")
|
||||
# reading_type: Mapped[str] = mapped_column(CHAR(1), primary_key=True) # One of: 音漢呉慣唐宋訓
|
||||
|
||||
class Kanji(Base):
|
||||
__tablename__ = "kanji"
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
character: Mapped[str] = mapped_column(CHAR(length=1), unique=True)
|
||||
level: Mapped[str] = mapped_column(String(length=2)) # Either 1, 2, etc. or 準2 etc.
|
||||
is_kokuji: Mapped[bool] = mapped_column(Boolean())
|
||||
meanings: Mapped[str] = mapped_column(Text()) # FIXME: make this a list
|
||||
# readings: Mapped[List[Reading]] = relationship(back_populates="kanji")
|
||||
goon: Mapped[List[Goon]] = relationship(back_populates="kanji")
|
||||
kanon: Mapped[List[Kanon]] = relationship(back_populates="kanji")
|
||||
kanyoon: Mapped[List[Kanyoon]] = relationship(back_populates="kanji")
|
||||
toon: Mapped[List[Toon]] = relationship(back_populates="kanji")
|
||||
soon: Mapped[List[Soon]] = relationship(back_populates="kanji")
|
||||
kun: Mapped[List[Kun]] = relationship(back_populates="kanji")
|
||||
unclassified_on: Mapped[List[UnclassifiedOn]] = relationship(back_populates="kanji")
|
||||
radical: Mapped[str] = mapped_column(CHAR(length=1)) # FIXME: normalize?
|
||||
stroke_count: Mapped[int] = mapped_column(Integer())
|
||||
radical_added_stroke_count: Mapped[int] = mapped_column(Integer()) # FIXME: normalize? this may theoretically be calculated based on the radical stroke count, but I need to validate that this always works
|
||||
glyph_origin: Mapped[str] = mapped_column(Text()) # FIXME: make this a list of possible explanations, possibly, but unsure
|
||||
# diagram: Mapped[str] = ... could be calculated from the kanji name
|
||||
|
||||
def to_json(self):
|
||||
out = {}
|
||||
for attr, value in self.__dict__.items():
|
||||
if not isinstance(value, (bool, int, str)):
|
||||
continue
|
||||
else:
|
||||
out[attr] = value
|
||||
|
||||
for attr in ("goon", "kanon", "kanyoon", "toon", "soon", "kun", "unclassified_on"):
|
||||
out[attr] = [reading_obj.reading for reading_obj in getattr(self, attr)]
|
||||
|
||||
return out
|
||||
|
||||
@blueprint.route("/id/<int:kanji_id>")
|
||||
def kanji_by_id(kanji_id: int):
|
||||
engine = create_engine("sqlite:///kanken_online/kanken.db")
|
||||
with Session(engine) as session:
|
||||
query = select(Kanji).where(Kanji.id == kanji_id)
|
||||
item = session.execute(query).first()
|
||||
if item is None:
|
||||
return "Invalid ID", 404
|
||||
|
||||
kanji = item[0]
|
||||
return kanji.to_json()
|
||||
|
||||
@blueprint.route("/kanji/<kanji>")
|
||||
def kanji_by_character(kanji: str):
|
||||
engine = create_engine("sqlite:///kanken_online/kanken.db")
|
||||
with Session(engine) as session:
|
||||
query = select(Kanji).where(Kanji.character == kanji)
|
||||
item = session.execute(query).first()
|
||||
if item is None:
|
||||
return "Invalid kanji", 404
|
||||
|
||||
kanji_obj = item[0]
|
||||
return kanji_obj.to_json()
|
86
kanken_online/auth.py
Normal file
86
kanken_online/auth.py
Normal file
@ -0,0 +1,86 @@
|
||||
import functools
|
||||
from flask import Blueprint, flash, g, redirect, render_template, request, session, url_for
|
||||
from werkzeug.security import check_password_hash, generate_password_hash
|
||||
from .database import get_database
|
||||
|
||||
blueprint = Blueprint("auth", __name__, url_prefix="/auth")
|
||||
|
||||
@blueprint.route("/register", methods=("GET", "POST"))
|
||||
def register():
|
||||
if request.method == "POST":
|
||||
username = request.form["username"]
|
||||
password = request.form["password"]
|
||||
db = get_database()
|
||||
error = None
|
||||
|
||||
if not username:
|
||||
error = "Username is required."
|
||||
elif not password:
|
||||
error = "Password is required."
|
||||
|
||||
if error is None:
|
||||
try:
|
||||
db.execute(
|
||||
"INSERT INTO user (username, password) VALUES (?, ?)",
|
||||
(username, generate_password_hash(password)),
|
||||
)
|
||||
db.commit()
|
||||
except db.IntegrityError:
|
||||
error = f"User {username} is already registered."
|
||||
else:
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
flash(error)
|
||||
|
||||
return render_template("auth/register.html")
|
||||
|
||||
@blueprint.route("/login", methods=("GET", "POST"))
|
||||
def login():
|
||||
if request.method == "POST":
|
||||
username = request.form["username"]
|
||||
password = request.form["password"]
|
||||
db = get_database()
|
||||
error = None
|
||||
user = db.execute(
|
||||
"SELECT * FROM user WHERE username = ?", (username,)
|
||||
).fetchone()
|
||||
|
||||
if user is None:
|
||||
error = "Incorrect username."
|
||||
elif not check_password_hash(user["password"], password):
|
||||
error = "Incorrect password."
|
||||
|
||||
if error is None:
|
||||
session.clear()
|
||||
session["user_id"] = user["id"]
|
||||
return redirect(url_for("index"))
|
||||
|
||||
flash(error)
|
||||
|
||||
return render_template("auth/login.html")
|
||||
|
||||
@blueprint.before_app_request
|
||||
def load_logged_in_user():
|
||||
user_id = session.get("user_id")
|
||||
|
||||
if user_id is None:
|
||||
g.user = None
|
||||
else:
|
||||
g.user = get_database().execute(
|
||||
"SELECT * FROM user WHERE id = ?", (user_id,)
|
||||
).fetchone()
|
||||
|
||||
@blueprint.route("/logout")
|
||||
def logout():
|
||||
session.clear()
|
||||
return redirect(url_for("index"))
|
||||
|
||||
def login_required(view):
|
||||
@functools.wraps(view)
|
||||
def wrapped_view(**kwargs):
|
||||
if g.user is None:
|
||||
return redirect(url_for("auth.login"))
|
||||
|
||||
return view(**kwargs)
|
||||
|
||||
return wrapped_view
|
40
kanken_online/database.py
Normal file
40
kanken_online/database.py
Normal file
@ -0,0 +1,40 @@
|
||||
import sqlite3
|
||||
from typing import IO
|
||||
import click
|
||||
from flask import Flask, current_app, g
|
||||
|
||||
def get_database():
|
||||
if "db" not in g:
|
||||
g.db = sqlite3.connect(
|
||||
current_app.config["DATABASE"],
|
||||
detect_types=sqlite3.PARSE_DECLTYPES
|
||||
)
|
||||
g.db.row_factory = sqlite3.Row
|
||||
|
||||
return g.db
|
||||
|
||||
def initialize_database():
|
||||
db = get_database()
|
||||
|
||||
with current_app.open_resource("schema.sql") as f:
|
||||
f: IO[bytes]
|
||||
db.executescript(f.read().decode())
|
||||
|
||||
def close_database(e=None):
|
||||
db = g.pop("db", None)
|
||||
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
@click.command("init-db")
|
||||
def init_db_command():
|
||||
"""Wipe the existing database and create new tables."""
|
||||
if input("Are you sure you wish to overwrite any existing database? (y/n) ") == "y":
|
||||
initialize_database()
|
||||
click.echo("Initialized the database.")
|
||||
else:
|
||||
click.echo("Aborted.")
|
||||
|
||||
def initialize_app(app: Flask):
|
||||
app.teardown_appcontext(close_database)
|
||||
app.cli.add_command(init_db_command)
|
18
kanken_online/schema.sql
Normal file
18
kanken_online/schema.sql
Normal file
@ -0,0 +1,18 @@
|
||||
DROP TABLE IF EXISTS user;
|
||||
-- DROP TABLE IF EXISTS post;
|
||||
|
||||
CREATE TABLE user (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
username TEXT UNIQUE NOT NULL,
|
||||
password TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- CREATE TABLE post (
|
||||
-- id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
-- author_id INTEGER NOT NULL,
|
||||
-- created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
-- title TEXT NOT NULL,
|
||||
-- body TEXT NOT NULL,
|
||||
-- FOREIGN KEY (author_id) REFERENCES user (id)
|
||||
-- );
|
||||
|
27
kanken_online/static/style.css
Normal file
27
kanken_online/static/style.css
Normal file
@ -0,0 +1,27 @@
|
||||
/* From the Flask tutorial */
|
||||
html { font-family: sans-serif; background: #eee; padding: 1rem; }
|
||||
body { max-width: 960px; margin: 0 auto; background: white; }
|
||||
h1 { font-family: serif; color: #377ba8; margin: 1rem 0; }
|
||||
a { color: #377ba8; }
|
||||
hr { border: none; border-top: 1px solid lightgray; }
|
||||
nav { background: lightgray; display: flex; align-items: center; padding: 0 0.5rem; }
|
||||
nav h1 { flex: auto; margin: 0; }
|
||||
nav h1 a { text-decoration: none; padding: 0.25rem 0.5rem; }
|
||||
nav ul { display: flex; list-style: none; margin: 0; padding: 0; }
|
||||
nav ul li a, nav ul li span, header .action { display: block; padding: 0.5rem; }
|
||||
.content { padding: 0 1rem 1rem; }
|
||||
.content > header { border-bottom: 1px solid lightgray; display: flex; align-items: flex-end; }
|
||||
.content > header h1 { flex: auto; margin: 1rem 0 0.25rem 0; }
|
||||
.flash { margin: 1em 0; padding: 1em; background: #cae6f6; border: 1px solid #377ba8; }
|
||||
.post > header { display: flex; align-items: flex-end; font-size: 0.85em; }
|
||||
.post > header > div:first-of-type { flex: auto; }
|
||||
.post > header h1 { font-size: 1.5em; margin-bottom: 0; }
|
||||
.post .about { color: slategray; font-style: italic; }
|
||||
.post .body { white-space: pre-line; }
|
||||
.content:last-child { margin-bottom: 0; }
|
||||
.content form { margin: 1em 0; display: flex; flex-direction: column; }
|
||||
.content label { font-weight: bold; margin-bottom: 0.5em; }
|
||||
.content input, .content textarea { margin-bottom: 1em; }
|
||||
.content textarea { min-height: 12em; resize: vertical; }
|
||||
input.danger { color: #cc2f2e; }
|
||||
input[type=submit] { align-self: start; min-width: 10em; }
|
15
kanken_online/templates/auth/login.html
Normal file
15
kanken_online/templates/auth/login.html
Normal file
@ -0,0 +1,15 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %}Log In{% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
<label for="username">Username</label>
|
||||
<input name="username" id="username" required autofocus>
|
||||
<label for="password">Password</label>
|
||||
<input type="password" name="password" id="password" required>
|
||||
<input type="submit" value="Log In">
|
||||
</form>
|
||||
{% endblock %}
|
15
kanken_online/templates/auth/register.html
Normal file
15
kanken_online/templates/auth/register.html
Normal file
@ -0,0 +1,15 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %}Register{% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<form method="post">
|
||||
<label for="username">Username</label>
|
||||
<input name="username" id="username" required autofocus>
|
||||
<label for="password">Password</label>
|
||||
<input type="password" name="password" id="password" required>
|
||||
<input type="submit" value="Register">
|
||||
</form>
|
||||
{% endblock %}
|
32
kanken_online/templates/base.html
Normal file
32
kanken_online/templates/base.html
Normal file
@ -0,0 +1,32 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>{% block title %}{% endblock %} - KankenOnline</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
||||
</head>
|
||||
<body>
|
||||
<nav>
|
||||
<h1>KankenOnline</h1>
|
||||
<ul>
|
||||
{% if g.user %}
|
||||
<li><span>{{ g.user['username'] }}</span>
|
||||
<li><a href="{{ url_for('options') }}">Options</a>
|
||||
<li><a href="{{ url_for('auth.logout') }}">Log Out</a>
|
||||
{% else %}
|
||||
<li><a href="{{ url_for('auth.register') }}">Register</a>
|
||||
<li><a href="{{ url_for('auth.login') }}">Log In</a>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</nav>
|
||||
<section class="content">
|
||||
<header>
|
||||
{% block header %}{% endblock %}
|
||||
</header>
|
||||
{% for message in get_flashed_messages() %}
|
||||
<div class="flash">{{ message }}</div>
|
||||
{% endfor %}
|
||||
{% block content %}{% endblock %}
|
||||
</section>
|
||||
</body>
|
||||
</html>
|
9
kanken_online/templates/index.html
Normal file
9
kanken_online/templates/index.html
Normal file
@ -0,0 +1,9 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %}Main Page{% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
Blahdy blah
|
||||
{% endblock %}
|
9
kanken_online/templates/options.html
Normal file
9
kanken_online/templates/options.html
Normal file
@ -0,0 +1,9 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %}Options{% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
Blahdy blah
|
||||
{% endblock %}
|
11
pyproject.toml
Normal file
11
pyproject.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[project]
|
||||
name = "kanken_online"
|
||||
version = "0.1.0"
|
||||
description = "Online Kanken practice and information portal."
|
||||
dependencies = [
|
||||
"flask",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["flit_core<4"]
|
||||
build-backend = "flit_core.buildapi"
|
Loading…
Reference in New Issue
Block a user