Базовое определение текста через easyocr

2026-03-06 03:36:24 +03:00 · 2023-03-11 01:47:30 +03:00
parent 344b712729
commit e8ca3e3c4d
2 changed files with 51 additions and 115 deletions
--- a/main.py
+++ b/main.py
@@ -1,17 +1,15 @@
 import time
 import math
 import logging
 import numpy as np
 import pandas as pd
 import pygetwindow as gw
 import pyautogui
 from pytesseract import pytesseract
 from pytesseract import Output
 import cv2
 import easyocr
 win_name = "EVE - Nostrom Stone"
 pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
 logging.basicConfig(
    level=logging.INFO,
@@ -22,8 +20,6 @@ logging.basicConfig(
 save_result = True
 pd.options.mode.use_inf_as_na = True
 def save_highlighted_screenshot(screenshot, boxes, filename):
    new_image = screenshot.copy()
@@ -90,108 +86,33 @@ def get_screenshot():
    return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB)
 def get_boxes(screenshot):
    inverted_screenshot = cv2.bitwise_not(
        cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
    )
    raw_boxes = pytesseract.image_to_data(
        inverted_screenshot,
        lang="eng",
        output_type=Output.DATAFRAME,
        config="--psm 3",
    )
    if save_result:
        raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False)
    base_boxes = raw_boxes.loc[
        (raw_boxes["conf"] > 30)
        & (raw_boxes["text"].notnull())
        & (raw_boxes["text"].str.isalnum())
    ]
    if save_result:
        save_highlighted_screenshot(
            screenshot, base_boxes, "1_base_highlighted_screenshot"
        )
        base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False)
    logging.info("Боксы получены")
    return base_boxes
 def union_boxes(base_boxes):
    result_phrases = pd.DataFrame(
        columns=[
            "left",
            "top",
            "width",
            "height",
            "text",
            "block_num",
            "line_num",
            "par_num",
        ]
    )
    for box in base_boxes["block_num"].unique():
        paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][
            "par_num"
        ].unique()
        for paragraph in paragraphs_in_box:
            words_in_paragraph = base_boxes.loc[
                (base_boxes["block_num"] == box)
                & (base_boxes["par_num"] == paragraph),
            ]
            grouped_words = words_in_paragraph.groupby(
                "line_num", as_index=False
            )
            box_phrases = grouped_words["width"].sum()
            box_phrases = box_phrases.merge(
                grouped_words["height"].max(), on="line_num", how="left"
            )
            box_phrases = box_phrases.merge(
                grouped_words["left"].min(), on="line_num", how="left"
            )
            box_phrases = box_phrases.merge(
                grouped_words["top"].min(), on="line_num", how="left"
            )
            box_phrases = box_phrases.merge(
                grouped_words["text"].apply(" ".join),
                on="line_num",
                how="left",
            )
            box_phrases["block_num"] = box
        rightest_box = words_in_paragraph.loc[
            words_in_paragraph["left"] == words_in_paragraph["left"].max()
        ]
        leftest_box = words_in_paragraph.loc[
            words_in_paragraph["left"] == words_in_paragraph["left"].min()
        ]
        box_phrases["width"] = (
            rightest_box.iloc[0].left
            + rightest_box.iloc[0].width
            - leftest_box.iloc[0].left
        )
        result_phrases = pd.concat([result_phrases, box_phrases])
    if save_result:
        result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False)
    logging.info("Боксы объединены")
    return result_phrases
 screenshot = get_screenshot()
 base_boxes = get_boxes(screenshot)
 unioned_boxes = union_boxes(base_boxes)
-save_highlighted_screenshot(
+reader = easyocr.Reader(["en"], gpu=True)
-    screenshot, unioned_boxes, "2_unioned_highlighted_screenshot"
+result = reader.readtext(screenshot)
-)
+
 for bbox, text, prob in result:
    (tl, tr, br, bl) = bbox
    tl = (int(tl[0]), int(tl[1]))
    tr = (int(tr[0]), int(tr[1]))
    br = (int(br[0]), int(br[1]))
    bl = (int(bl[0]), int(bl[1]))
    text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
    cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2)
    cv2.putText(
        screenshot,
        text,
        (tl[0], tl[1] - 10),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (0, 255, 0),
        2,
    )
 print(result)
 cv2.imshow("screenshot", screenshot)
 cv2.waitKey(0)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,19 +1,34 @@
-et-xmlfile==1.1.0
+certifi==2022.12.7
 charset-normalizer==3.1.0
 easyocr==1.6.2
 idna==3.4
 imageio==2.26.0
 lazy_loader==0.1
 MouseInfo==0.1.3
 networkx==3.0
 ninja==1.11.1
 numpy==1.24.2
 opencv-python==4.7.0.72
 openpyxl==3.1.1
 packaging==23.0
 pandas==1.5.3
 Pillow==9.4.0
 PyAutoGUI==0.9.53
 pyclipper==1.3.0.post4
 PyGetWindow==0.0.9
 PyMsgBox==1.0.9
 pyperclip==1.8.2
 PyRect==0.2.0
 PyScreeze==0.1.28
-pytesseract==0.3.10
+python-bidi==0.4.2
 python-dateutil==2.8.2
 pytweening==1.0.4
-pytz==2022.7.1
+PyWavelets==1.4.1
 PyYAML==6.0
 requests==2.28.2
 scikit-image==0.20.0
 scipy==1.10.1
 shapely==2.0.1
 six==1.16.0
 tifffile==2023.2.28
 torch==1.13.1+cu116
 torchvision==0.14.1+cu116
 typing_extensions==4.5.0
 urllib3==1.26.14