aW1wb3J0IGN2MgppbXBvcnQgcHl0ZXNzZXJhY3QKaW1wb3J0IHJhbmRvbQppbXBvcnQgcmUKZnJvbSBqYW5vbWUudG9rZW5pemVyIGltcG9ydCBUb2tlbml6ZXIKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIE9DUuOBp+eUu+WDj+OBi+OCieaWh+Wtl+aKveWHugojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KZGVmIG9jcl9pbWFnZShpbWFnZV9wYXRoKToKICAgIGltZyA9IGN2Mi5pbXJlYWQoaW1hZ2VfcGF0aCkKICAgIGdyYXkgPSBjdjIuY3Z0Q29sb3IoaW1nLCBjdjIuQ09MT1JfQkdSMkdSQVkpCiAgICB0ZXh0ID0gcHl0ZXNzZXJhY3QuaW1hZ2VfdG9fc3RyaW5nKGdyYXksIGxhbmc9J2pwbicpCiAgICByZXR1cm4gdGV4dAoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMg5paH44KS5YiG5YmyCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpkZWYgc3BsaXRfc2VudGVuY2VzKHRleHQpOgogICAgc2VudGVuY2VzID0gcmUuc3BsaXQoJ1vjgILvvIHvvJ9cbl0nLCB0ZXh0KQogICAgc2VudGVuY2VzID0gW3Muc3RyaXAoKSBmb3IgcyBpbiBzZW50ZW5jZXMgaWYgcy5zdHJpcCgpXQogICAgcmV0dXJuIHNlbnRlbmNlcwoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMg5ryi5a2X44KS5ZCr44KA5paH44KS5oq95Ye6CiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpkZWYgZ2V0X3NlbnRlbmNlc193aXRoX2thbmppKHNlbnRlbmNlcyk6CiAgICByZXR1cm4gW3MgZm9yIHMgaW4gc2VudGVuY2VzIGlmIHJlLnNlYXJjaChyJ1vkuIAt6b6lXScsIHMpXQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMg44Gy44KJ44GM44Gq5aSJ5o+bCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpkZWYgdG9faGlyYWdhbmEodGV4dCk6CiAgICByZXN1bHQgPSAmcXVvdDsmcXVvdDsKICAgIGZvciBjaGFyIGluIHRleHQ6CiAgICAgICAgIyDjgqvjgr/jgqvjg4omcmFycjvjgbLjgonjgYzjgaoKICAgICAgICBpZiAn44KhJyAmbHQ7PSBjaGFyICZsdDs9ICfjg7MnOgogICAgICAgICAgICByZXN1bHQgKz0gY2hyKG9yZChjaGFyKSAtIDB4NjApCiAgICAgICAgZWxzZToKICAgICAgICAgICAgcmVzdWx0ICs9IGNoYXIKICAgIHJldHVybiByZXN1bHQKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIOa8ouWtl+aKveWHugojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KZGVmIGV4dHJhY3Rfa2FuamkodGV4dCk6CiAgICByZXR1cm4gJnF1b3Q7JnF1b3Q7LmpvaW4ocmUuZmluZGFsbChyJ1vkuIAt6b6lXScsIHRleHQpKQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMg44Kv44Kk44K65L2c5oiQCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpkZWYgY3JlYXRlX3F1aXooc2VudGVuY2VzLCBudW1fcXVlc3Rpb25zPTEwKToKICAgIHNlbGVjdGVkID0gcmFuZG9tLnNhbXBsZShzZW50ZW5jZXMsIG1pbihudW1fcXVlc3Rpb25zLCBsZW4oc2VudGVuY2VzKSkpCgogICAgZm9yIGksIHNlbnRlbmNlIGluIGVudW1lcmF0ZShzZWxlY3RlZCwgMSk6CiAgICAgICAgaGlyYWdhbmFfc2VudGVuY2UgPSB0b19oaXJhZ2FuYShzZW50ZW5jZSkKICAgICAgICBrYW5qaV9hbnN3ZXIgPSBleHRyYWN0X2thbmppKHNlbnRlbmNlKQoKICAgICAgICBwcmludChmJnF1b3Q7XG7jgJDnrKx7aX3llY/jgJEmcXVvdDspCiAgICAgICAgcHJpbnQoJnF1b3Q744Gk44GO44GuIOOBtuOCk+OCkiDjgojjgb/jgb7jgZfjgofjgYbjgIImcXVvdDspCiAgICAgICAgcHJpbnQoaGlyYWdhbmFfc2VudGVuY2UpCiAgICAgICAgcHJpbnQoJnF1b3Q7XG7imIUg44KC44Go44GuIOOBi+OCk+OBmOOBryDjgarjgpPjgafjgZfjgZ/jgYvvvJ8mcXVvdDspCiAgICAgICAgcHJpbnQoJnF1b3Q7KOetlOOBiCkgOiZxdW90Oywga2FuamlfYW5zd2VyKQogICAgICAgIHByaW50KCZxdW90Oy0mcXVvdDsgKiA0MCkKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIOODoeOCpOODswojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KaWYgX19uYW1lX18gPT0gJnF1b3Q7X19tYWluX18mcXVvdDs6CiAgICBpbWFnZV9wYXRoID0gJnF1b3Q7a3lva2FzaG8uanBnJnF1b3Q7ICAjIOaSruW9seOBl+OBn+WGmeecnwoKICAgIHByaW50KCZxdW90O+eUu+WDj+OCkuiqreOBv+i+vOOBv+S4rS4uLiZxdW90OykKICAgIHRleHQgPSBvY3JfaW1hZ2UoaW1hZ2VfcGF0aCkKCiAgICBzZW50ZW5jZXMgPSBzcGxpdF9zZW50ZW5jZXModGV4dCkKICAgIGthbmppX3NlbnRlbmNlcyA9IGdldF9zZW50ZW5jZXNfd2l0aF9rYW5qaShzZW50ZW5jZXMpCgogICAgaWYgbGVuKGthbmppX3NlbnRlbmNlcykgPT0gMDoKICAgICAgICBwcmludCgmcXVvdDvmvKLlrZfjgpLlkKvjgoDmlofjgYzopovjgaTjgYvjgorjgb7jgZvjgpPjgafjgZfjgZ/jgIImcXVvdDspCiAgICBlbHNlOgogICAgICAgIGNyZWF0ZV9xdWl6KGthbmppX3NlbnRlbmNlcywgMTAp
import cv2
import pytesseract
import random
import re
from janome.tokenizer import Tokenizer
# ----------------------------
# OCRで画像から文字抽出
# ----------------------------
def ocr_image(image_path):
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
text = pytesseract.image_to_string(gray, lang='jpn')
return text
# ----------------------------
# 文を分割
# ----------------------------
def split_sentences(text):
sentences = re.split('[。!?\n]', text)
sentences = [s.strip() for s in sentences if s.strip()]
return sentences
# ----------------------------
# 漢字を含む文を抽出
# ----------------------------
def get_sentences_with_kanji(sentences):
return [s for s in sentences if re.search(r'[一-龥]', s)]
# ----------------------------
# ひらがな変換
# ----------------------------
def to_hiragana(text):
result = ""
for char in text:
# カタカナ→ひらがな
if 'ァ' <= char <= 'ン':
result += chr(ord(char) - 0x60)
else:
result += char
return result
# ----------------------------
# 漢字抽出
# ----------------------------
def extract_kanji(text):
return "".join(re.findall(r'[一-龥]', text))
# ----------------------------
# クイズ作成
# ----------------------------
def create_quiz(sentences, num_questions=10):
selected = random.sample(sentences, min(num_questions, len(sentences)))
for i, sentence in enumerate(selected, 1):
hiragana_sentence = to_hiragana(sentence)
kanji_answer = extract_kanji(sentence)
print(f"\n【第{i}問】")
print("つぎの ぶんを よみましょう。")
print(hiragana_sentence)
print("\n★ もとの かんじは なんでしたか?")
print("(答え) :", kanji_answer)
print("-" * 40)
# ----------------------------
# メイン
# ----------------------------
if __name__ == "__main__":
image_path = "kyokasho.jpg" # 撮影した写真
print("画像を読み込み中...")
text = ocr_image(image_path)
sentences = split_sentences(text)
kanji_sentences = get_sentences_with_kanji(sentences)
if len(kanji_sentences) == 0:
print("漢字を含む文が見つかりませんでした。")
else:
create_quiz(kanji_sentences, 10)