diff --git a/main.py b/main.py index f422a46..5cdf044 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,15 @@ import time import math import logging + import numpy as np -import pandas as pd import pygetwindow as gw import pyautogui -from pytesseract import pytesseract -from pytesseract import Output import cv2 +import easyocr win_name = "EVE - Nostrom Stone" -pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" logging.basicConfig( level=logging.INFO, @@ -22,8 +20,6 @@ logging.basicConfig( save_result = True -pd.options.mode.use_inf_as_na = True - def save_highlighted_screenshot(screenshot, boxes, filename): new_image = screenshot.copy() @@ -90,108 +86,33 @@ def get_screenshot(): return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB) -def get_boxes(screenshot): - inverted_screenshot = cv2.bitwise_not( - cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) - ) - - raw_boxes = pytesseract.image_to_data( - inverted_screenshot, - lang="eng", - output_type=Output.DATAFRAME, - config="--psm 3", - ) - - if save_result: - raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False) - - base_boxes = raw_boxes.loc[ - (raw_boxes["conf"] > 30) - & (raw_boxes["text"].notnull()) - & (raw_boxes["text"].str.isalnum()) - ] - - if save_result: - save_highlighted_screenshot( - screenshot, base_boxes, "1_base_highlighted_screenshot" - ) - base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False) - - logging.info("Боксы получены") - - return base_boxes - - -def union_boxes(base_boxes): - result_phrases = pd.DataFrame( - columns=[ - "left", - "top", - "width", - "height", - "text", - "block_num", - "line_num", - "par_num", - ] - ) - for box in base_boxes["block_num"].unique(): - paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][ - "par_num" - ].unique() - for paragraph in paragraphs_in_box: - words_in_paragraph = base_boxes.loc[ - (base_boxes["block_num"] == box) - & (base_boxes["par_num"] == paragraph), - ] - - grouped_words = words_in_paragraph.groupby( - "line_num", as_index=False - ) - - box_phrases = grouped_words["width"].sum() - box_phrases = box_phrases.merge( - grouped_words["height"].max(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["left"].min(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["top"].min(), on="line_num", how="left" - ) - box_phrases = box_phrases.merge( - grouped_words["text"].apply(" ".join), - on="line_num", - how="left", - ) - box_phrases["block_num"] = box - - rightest_box = words_in_paragraph.loc[ - words_in_paragraph["left"] == words_in_paragraph["left"].max() - ] - leftest_box = words_in_paragraph.loc[ - words_in_paragraph["left"] == words_in_paragraph["left"].min() - ] - box_phrases["width"] = ( - rightest_box.iloc[0].left - + rightest_box.iloc[0].width - - leftest_box.iloc[0].left - ) - - result_phrases = pd.concat([result_phrases, box_phrases]) - - if save_result: - result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False) - - logging.info("Боксы объединены") - - return result_phrases - - screenshot = get_screenshot() -base_boxes = get_boxes(screenshot) -unioned_boxes = union_boxes(base_boxes) -save_highlighted_screenshot( - screenshot, unioned_boxes, "2_unioned_highlighted_screenshot" -) +reader = easyocr.Reader(["en"], gpu=True) +result = reader.readtext(screenshot) + + +for bbox, text, prob in result: + (tl, tr, br, bl) = bbox + tl = (int(tl[0]), int(tl[1])) + tr = (int(tr[0]), int(tr[1])) + br = (int(br[0]), int(br[1])) + bl = (int(bl[0]), int(bl[1])) + + text = "".join([c if ord(c) < 128 else "" for c in text]).strip() + cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2) + cv2.putText( + screenshot, + text, + (tl[0], tl[1] - 10), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (0, 255, 0), + 2, + ) + + +print(result) + +cv2.imshow("screenshot", screenshot) +cv2.waitKey(0) diff --git a/requirements.txt b/requirements.txt index 10888dd..a4f101c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,34 @@ -et-xmlfile==1.1.0 +certifi==2022.12.7 +charset-normalizer==3.1.0 +easyocr==1.6.2 +idna==3.4 +imageio==2.26.0 +lazy_loader==0.1 MouseInfo==0.1.3 +networkx==3.0 +ninja==1.11.1 numpy==1.24.2 opencv-python==4.7.0.72 -openpyxl==3.1.1 packaging==23.0 -pandas==1.5.3 Pillow==9.4.0 PyAutoGUI==0.9.53 +pyclipper==1.3.0.post4 PyGetWindow==0.0.9 PyMsgBox==1.0.9 pyperclip==1.8.2 PyRect==0.2.0 PyScreeze==0.1.28 -pytesseract==0.3.10 -python-dateutil==2.8.2 +python-bidi==0.4.2 pytweening==1.0.4 -pytz==2022.7.1 +PyWavelets==1.4.1 +PyYAML==6.0 +requests==2.28.2 +scikit-image==0.20.0 +scipy==1.10.1 +shapely==2.0.1 six==1.16.0 +tifffile==2023.2.28 +torch==1.13.1+cu116 +torchvision==0.14.1+cu116 +typing_extensions==4.5.0 +urllib3==1.26.14