From e8ca3e3c4da3803d5f3aec373e6322d7b32d75f2 Mon Sep 17 00:00:00 2001 From: Llloooggg Date: Sat, 11 Mar 2023 01:47:30 +0300 Subject: [PATCH] =?UTF-8?q?=D0=91=D0=B0=D0=B7=D0=BE=D0=B2=D0=BE=D0=B5=20?= =?UTF-8?q?=D0=BE=D0=BF=D1=80=D0=B5=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D1=82=D0=B5=D0=BA=D1=81=D1=82=D0=B0=20=D1=87=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=B7=20easyocr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 139 ++++++++++------------------------------------- requirements.txt | 27 +++++++-- 2 files changed, 51 insertions(+), 115 deletions(-) diff --git a/main.py b/main.py index f422a46..5cdf044 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,15 @@ import time import math import logging + import numpy as np -import pandas as pd import pygetwindow as gw import pyautogui -from pytesseract import pytesseract -from pytesseract import Output import cv2 +import easyocr win_name = "EVE - Nostrom Stone" -pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" logging.basicConfig( level=logging.INFO, @@ -22,8 +20,6 @@ logging.basicConfig( save_result = True -pd.options.mode.use_inf_as_na = True - def save_highlighted_screenshot(screenshot, boxes, filename): new_image = screenshot.copy() @@ -90,108 +86,33 @@ def get_screenshot(): return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB) -def get_boxes(screenshot): - inverted_screenshot = cv2.bitwise_not( - cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) - ) - - raw_boxes = pytesseract.image_to_data( - inverted_screenshot, - lang="eng", - output_type=Output.DATAFRAME, - config="--psm 3", - ) - - if save_result: - raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False) - - base_boxes = raw_boxes.loc[ - (raw_boxes["conf"] > 30) - & (raw_boxes["text"].notnull()) - & (raw_boxes["text"].str.isalnum()) - ] - - if save_result: - save_highlighted_screenshot( - screenshot, base_boxes, "1_base_highlighted_screenshot" - ) - base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False) - - logging.info("Боксы получены") - - return base_boxes - - -def union_boxes(base_boxes): - result_phrases = pd.DataFrame( - columns=[ - "left", - "top", - "width", - "height", - "text", - "block_num", - "line_num", - "par_num", - ] - ) - for box in base_boxes["block_num"].unique(): - paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][ - "par_num" - ].unique() - for paragraph in paragraphs_in_box: - words_in_paragraph = base_boxes.loc[ - (base_boxes["block_num"] == box) - & (base_boxes["par_num"] == paragraph), - ] - - grouped_words = words_in_paragraph.groupby( - "line_num", as_index=False - ) - - box_phrases = grouped_words["width"].sum() - box_phrases = box_phrases.merge( - grouped_words["height"].max(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["left"].min(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["top"].min(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["text"].apply(" ".join), - on="line_num", - how="left", - ) - box_phrases["block_num"] = box - - rightest_box = words_in_paragraph.loc[ - words_in_paragraph["left"] == words_in_paragraph["left"].max() - ] - leftest_box = words_in_paragraph.loc[ - words_in_paragraph["left"] == words_in_paragraph["left"].min() - ] - box_phrases["width"] = ( - rightest_box.iloc[0].left - + rightest_box.iloc[0].width - - leftest_box.iloc[0].left - ) - - result_phrases = pd.concat([result_phrases, box_phrases]) - - if save_result: - result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False) - - logging.info("Боксы объединены") - - return result_phrases - - screenshot = get_screenshot() -base_boxes = get_boxes(screenshot) -unioned_boxes = union_boxes(base_boxes) -save_highlighted_screenshot( - screenshot, unioned_boxes, "2_unioned_highlighted_screenshot" -) +reader = easyocr.Reader(["en"], gpu=True) +result = reader.readtext(screenshot) + + +for bbox, text, prob in result: + (tl, tr, br, bl) = bbox + tl = (int(tl[0]), int(tl[1])) + tr = (int(tr[0]), int(tr[1])) + br = (int(br[0]), int(br[1])) + bl = (int(bl[0]), int(bl[1])) + + text = "".join([c if ord(c) < 128 else "" for c in text]).strip() + cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2) + cv2.putText( + screenshot, + text, + (tl[0], tl[1] - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (0, 255, 0), + 2, + ) + + +print(result) + +cv2.imshow("screenshot", screenshot) +cv2.waitKey(0) diff --git a/requirements.txt b/requirements.txt index 10888dd..a4f101c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,34 @@ -et-xmlfile==1.1.0 +certifi==2022.12.7 +charset-normalizer==3.1.0 +easyocr==1.6.2 +idna==3.4 +imageio==2.26.0 +lazy_loader==0.1 MouseInfo==0.1.3 +networkx==3.0 +ninja==1.11.1 numpy==1.24.2 opencv-python==4.7.0.72 -openpyxl==3.1.1 packaging==23.0 -pandas==1.5.3 Pillow==9.4.0 PyAutoGUI==0.9.53 +pyclipper==1.3.0.post4 PyGetWindow==0.0.9 PyMsgBox==1.0.9 pyperclip==1.8.2 PyRect==0.2.0 PyScreeze==0.1.28 -pytesseract==0.3.10 -python-dateutil==2.8.2 +python-bidi==0.4.2 pytweening==1.0.4 -pytz==2022.7.1 +PyWavelets==1.4.1 +PyYAML==6.0 +requests==2.28.2 +scikit-image==0.20.0 +scipy==1.10.1 +shapely==2.0.1 six==1.16.0 +tifffile==2023.2.28 +torch==1.13.1+cu116 +torchvision==0.14.1+cu116 +typing_extensions==4.5.0 +urllib3==1.26.14