You've already forked MineEVECraft
mirror of
https://github.com/Llloooggg/MineEVECraft.git
synced 2026-03-06 03:36:24 +03:00
Базовое определение текста через easyocr
This commit is contained in:
139
main.py
139
main.py
@@ -1,17 +1,15 @@
|
|||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import pygetwindow as gw
|
import pygetwindow as gw
|
||||||
import pyautogui
|
import pyautogui
|
||||||
from pytesseract import pytesseract
|
|
||||||
from pytesseract import Output
|
|
||||||
import cv2
|
import cv2
|
||||||
|
import easyocr
|
||||||
|
|
||||||
|
|
||||||
win_name = "EVE - Nostrom Stone"
|
win_name = "EVE - Nostrom Stone"
|
||||||
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
|
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@@ -22,8 +20,6 @@ logging.basicConfig(
|
|||||||
|
|
||||||
save_result = True
|
save_result = True
|
||||||
|
|
||||||
pd.options.mode.use_inf_as_na = True
|
|
||||||
|
|
||||||
|
|
||||||
def save_highlighted_screenshot(screenshot, boxes, filename):
|
def save_highlighted_screenshot(screenshot, boxes, filename):
|
||||||
new_image = screenshot.copy()
|
new_image = screenshot.copy()
|
||||||
@@ -90,108 +86,33 @@ def get_screenshot():
|
|||||||
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB)
|
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB)
|
||||||
|
|
||||||
|
|
||||||
def get_boxes(screenshot):
|
|
||||||
inverted_screenshot = cv2.bitwise_not(
|
|
||||||
cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
|
|
||||||
)
|
|
||||||
|
|
||||||
raw_boxes = pytesseract.image_to_data(
|
|
||||||
inverted_screenshot,
|
|
||||||
lang="eng",
|
|
||||||
output_type=Output.DATAFRAME,
|
|
||||||
config="--psm 3",
|
|
||||||
)
|
|
||||||
|
|
||||||
if save_result:
|
|
||||||
raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False)
|
|
||||||
|
|
||||||
base_boxes = raw_boxes.loc[
|
|
||||||
(raw_boxes["conf"] > 30)
|
|
||||||
& (raw_boxes["text"].notnull())
|
|
||||||
& (raw_boxes["text"].str.isalnum())
|
|
||||||
]
|
|
||||||
|
|
||||||
if save_result:
|
|
||||||
save_highlighted_screenshot(
|
|
||||||
screenshot, base_boxes, "1_base_highlighted_screenshot"
|
|
||||||
)
|
|
||||||
base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False)
|
|
||||||
|
|
||||||
logging.info("Боксы получены")
|
|
||||||
|
|
||||||
return base_boxes
|
|
||||||
|
|
||||||
|
|
||||||
def union_boxes(base_boxes):
|
|
||||||
result_phrases = pd.DataFrame(
|
|
||||||
columns=[
|
|
||||||
"left",
|
|
||||||
"top",
|
|
||||||
"width",
|
|
||||||
"height",
|
|
||||||
"text",
|
|
||||||
"block_num",
|
|
||||||
"line_num",
|
|
||||||
"par_num",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
for box in base_boxes["block_num"].unique():
|
|
||||||
paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][
|
|
||||||
"par_num"
|
|
||||||
].unique()
|
|
||||||
for paragraph in paragraphs_in_box:
|
|
||||||
words_in_paragraph = base_boxes.loc[
|
|
||||||
(base_boxes["block_num"] == box)
|
|
||||||
& (base_boxes["par_num"] == paragraph),
|
|
||||||
]
|
|
||||||
|
|
||||||
grouped_words = words_in_paragraph.groupby(
|
|
||||||
"line_num", as_index=False
|
|
||||||
)
|
|
||||||
|
|
||||||
box_phrases = grouped_words["width"].sum()
|
|
||||||
box_phrases = box_phrases.merge(
|
|
||||||
grouped_words["height"].max(), on="line_num", how="left"
|
|
||||||
)
|
|
||||||
box_phrases = box_phrases.merge(
|
|
||||||
grouped_words["left"].min(), on="line_num", how="left"
|
|
||||||
)
|
|
||||||
box_phrases = box_phrases.merge(
|
|
||||||
grouped_words["top"].min(), on="line_num", how="left"
|
|
||||||
)
|
|
||||||
box_phrases = box_phrases.merge(
|
|
||||||
grouped_words["text"].apply(" ".join),
|
|
||||||
on="line_num",
|
|
||||||
how="left",
|
|
||||||
)
|
|
||||||
box_phrases["block_num"] = box
|
|
||||||
|
|
||||||
rightest_box = words_in_paragraph.loc[
|
|
||||||
words_in_paragraph["left"] == words_in_paragraph["left"].max()
|
|
||||||
]
|
|
||||||
leftest_box = words_in_paragraph.loc[
|
|
||||||
words_in_paragraph["left"] == words_in_paragraph["left"].min()
|
|
||||||
]
|
|
||||||
box_phrases["width"] = (
|
|
||||||
rightest_box.iloc[0].left
|
|
||||||
+ rightest_box.iloc[0].width
|
|
||||||
- leftest_box.iloc[0].left
|
|
||||||
)
|
|
||||||
|
|
||||||
result_phrases = pd.concat([result_phrases, box_phrases])
|
|
||||||
|
|
||||||
if save_result:
|
|
||||||
result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False)
|
|
||||||
|
|
||||||
logging.info("Боксы объединены")
|
|
||||||
|
|
||||||
return result_phrases
|
|
||||||
|
|
||||||
|
|
||||||
screenshot = get_screenshot()
|
screenshot = get_screenshot()
|
||||||
base_boxes = get_boxes(screenshot)
|
|
||||||
unioned_boxes = union_boxes(base_boxes)
|
|
||||||
|
|
||||||
save_highlighted_screenshot(
|
reader = easyocr.Reader(["en"], gpu=True)
|
||||||
screenshot, unioned_boxes, "2_unioned_highlighted_screenshot"
|
result = reader.readtext(screenshot)
|
||||||
)
|
|
||||||
|
|
||||||
|
for bbox, text, prob in result:
|
||||||
|
(tl, tr, br, bl) = bbox
|
||||||
|
tl = (int(tl[0]), int(tl[1]))
|
||||||
|
tr = (int(tr[0]), int(tr[1]))
|
||||||
|
br = (int(br[0]), int(br[1]))
|
||||||
|
bl = (int(bl[0]), int(bl[1]))
|
||||||
|
|
||||||
|
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
|
||||||
|
cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2)
|
||||||
|
cv2.putText(
|
||||||
|
screenshot,
|
||||||
|
text,
|
||||||
|
(tl[0], tl[1] - 10),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX,
|
||||||
|
0.8,
|
||||||
|
(0, 255, 0),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
cv2.imshow("screenshot", screenshot)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
|||||||
@@ -1,19 +1,34 @@
|
|||||||
et-xmlfile==1.1.0
|
certifi==2022.12.7
|
||||||
|
charset-normalizer==3.1.0
|
||||||
|
easyocr==1.6.2
|
||||||
|
idna==3.4
|
||||||
|
imageio==2.26.0
|
||||||
|
lazy_loader==0.1
|
||||||
MouseInfo==0.1.3
|
MouseInfo==0.1.3
|
||||||
|
networkx==3.0
|
||||||
|
ninja==1.11.1
|
||||||
numpy==1.24.2
|
numpy==1.24.2
|
||||||
opencv-python==4.7.0.72
|
opencv-python==4.7.0.72
|
||||||
openpyxl==3.1.1
|
|
||||||
packaging==23.0
|
packaging==23.0
|
||||||
pandas==1.5.3
|
|
||||||
Pillow==9.4.0
|
Pillow==9.4.0
|
||||||
PyAutoGUI==0.9.53
|
PyAutoGUI==0.9.53
|
||||||
|
pyclipper==1.3.0.post4
|
||||||
PyGetWindow==0.0.9
|
PyGetWindow==0.0.9
|
||||||
PyMsgBox==1.0.9
|
PyMsgBox==1.0.9
|
||||||
pyperclip==1.8.2
|
pyperclip==1.8.2
|
||||||
PyRect==0.2.0
|
PyRect==0.2.0
|
||||||
PyScreeze==0.1.28
|
PyScreeze==0.1.28
|
||||||
pytesseract==0.3.10
|
python-bidi==0.4.2
|
||||||
python-dateutil==2.8.2
|
|
||||||
pytweening==1.0.4
|
pytweening==1.0.4
|
||||||
pytz==2022.7.1
|
PyWavelets==1.4.1
|
||||||
|
PyYAML==6.0
|
||||||
|
requests==2.28.2
|
||||||
|
scikit-image==0.20.0
|
||||||
|
scipy==1.10.1
|
||||||
|
shapely==2.0.1
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
|
tifffile==2023.2.28
|
||||||
|
torch==1.13.1+cu116
|
||||||
|
torchvision==0.14.1+cu116
|
||||||
|
typing_extensions==4.5.0
|
||||||
|
urllib3==1.26.14
|
||||||
|
|||||||
Reference in New Issue
Block a user