You've already forked MineEVECraft
mirror of
https://github.com/Llloooggg/MineEVECraft.git
synced 2026-03-06 03:36:24 +03:00
Базовое определение текста через easyocr
This commit is contained in:
139
main.py
139
main.py
@@ -1,17 +1,15 @@
|
||||
import time
|
||||
import math
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pygetwindow as gw
|
||||
import pyautogui
|
||||
from pytesseract import pytesseract
|
||||
from pytesseract import Output
|
||||
import cv2
|
||||
import easyocr
|
||||
|
||||
|
||||
win_name = "EVE - Nostrom Stone"
|
||||
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -22,8 +20,6 @@ logging.basicConfig(
|
||||
|
||||
save_result = True
|
||||
|
||||
pd.options.mode.use_inf_as_na = True
|
||||
|
||||
|
||||
def save_highlighted_screenshot(screenshot, boxes, filename):
|
||||
new_image = screenshot.copy()
|
||||
@@ -90,108 +86,33 @@ def get_screenshot():
|
||||
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB)
|
||||
|
||||
|
||||
def get_boxes(screenshot):
|
||||
inverted_screenshot = cv2.bitwise_not(
|
||||
cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
|
||||
)
|
||||
|
||||
raw_boxes = pytesseract.image_to_data(
|
||||
inverted_screenshot,
|
||||
lang="eng",
|
||||
output_type=Output.DATAFRAME,
|
||||
config="--psm 3",
|
||||
)
|
||||
|
||||
if save_result:
|
||||
raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False)
|
||||
|
||||
base_boxes = raw_boxes.loc[
|
||||
(raw_boxes["conf"] > 30)
|
||||
& (raw_boxes["text"].notnull())
|
||||
& (raw_boxes["text"].str.isalnum())
|
||||
]
|
||||
|
||||
if save_result:
|
||||
save_highlighted_screenshot(
|
||||
screenshot, base_boxes, "1_base_highlighted_screenshot"
|
||||
)
|
||||
base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False)
|
||||
|
||||
logging.info("Боксы получены")
|
||||
|
||||
return base_boxes
|
||||
|
||||
|
||||
def union_boxes(base_boxes):
|
||||
result_phrases = pd.DataFrame(
|
||||
columns=[
|
||||
"left",
|
||||
"top",
|
||||
"width",
|
||||
"height",
|
||||
"text",
|
||||
"block_num",
|
||||
"line_num",
|
||||
"par_num",
|
||||
]
|
||||
)
|
||||
for box in base_boxes["block_num"].unique():
|
||||
paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][
|
||||
"par_num"
|
||||
].unique()
|
||||
for paragraph in paragraphs_in_box:
|
||||
words_in_paragraph = base_boxes.loc[
|
||||
(base_boxes["block_num"] == box)
|
||||
& (base_boxes["par_num"] == paragraph),
|
||||
]
|
||||
|
||||
grouped_words = words_in_paragraph.groupby(
|
||||
"line_num", as_index=False
|
||||
)
|
||||
|
||||
box_phrases = grouped_words["width"].sum()
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["height"].max(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["left"].min(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["top"].min(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["text"].apply(" ".join),
|
||||
on="line_num",
|
||||
how="left",
|
||||
)
|
||||
box_phrases["block_num"] = box
|
||||
|
||||
rightest_box = words_in_paragraph.loc[
|
||||
words_in_paragraph["left"] == words_in_paragraph["left"].max()
|
||||
]
|
||||
leftest_box = words_in_paragraph.loc[
|
||||
words_in_paragraph["left"] == words_in_paragraph["left"].min()
|
||||
]
|
||||
box_phrases["width"] = (
|
||||
rightest_box.iloc[0].left
|
||||
+ rightest_box.iloc[0].width
|
||||
- leftest_box.iloc[0].left
|
||||
)
|
||||
|
||||
result_phrases = pd.concat([result_phrases, box_phrases])
|
||||
|
||||
if save_result:
|
||||
result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False)
|
||||
|
||||
logging.info("Боксы объединены")
|
||||
|
||||
return result_phrases
|
||||
|
||||
|
||||
screenshot = get_screenshot()
|
||||
base_boxes = get_boxes(screenshot)
|
||||
unioned_boxes = union_boxes(base_boxes)
|
||||
|
||||
save_highlighted_screenshot(
|
||||
screenshot, unioned_boxes, "2_unioned_highlighted_screenshot"
|
||||
)
|
||||
reader = easyocr.Reader(["en"], gpu=True)
|
||||
result = reader.readtext(screenshot)
|
||||
|
||||
|
||||
for bbox, text, prob in result:
|
||||
(tl, tr, br, bl) = bbox
|
||||
tl = (int(tl[0]), int(tl[1]))
|
||||
tr = (int(tr[0]), int(tr[1]))
|
||||
br = (int(br[0]), int(br[1]))
|
||||
bl = (int(bl[0]), int(bl[1]))
|
||||
|
||||
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
|
||||
cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
screenshot,
|
||||
text,
|
||||
(tl[0], tl[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
|
||||
print(result)
|
||||
|
||||
cv2.imshow("screenshot", screenshot)
|
||||
cv2.waitKey(0)
|
||||
|
||||
@@ -1,19 +1,34 @@
|
||||
et-xmlfile==1.1.0
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
easyocr==1.6.2
|
||||
idna==3.4
|
||||
imageio==2.26.0
|
||||
lazy_loader==0.1
|
||||
MouseInfo==0.1.3
|
||||
networkx==3.0
|
||||
ninja==1.11.1
|
||||
numpy==1.24.2
|
||||
opencv-python==4.7.0.72
|
||||
openpyxl==3.1.1
|
||||
packaging==23.0
|
||||
pandas==1.5.3
|
||||
Pillow==9.4.0
|
||||
PyAutoGUI==0.9.53
|
||||
pyclipper==1.3.0.post4
|
||||
PyGetWindow==0.0.9
|
||||
PyMsgBox==1.0.9
|
||||
pyperclip==1.8.2
|
||||
PyRect==0.2.0
|
||||
PyScreeze==0.1.28
|
||||
pytesseract==0.3.10
|
||||
python-dateutil==2.8.2
|
||||
python-bidi==0.4.2
|
||||
pytweening==1.0.4
|
||||
pytz==2022.7.1
|
||||
PyWavelets==1.4.1
|
||||
PyYAML==6.0
|
||||
requests==2.28.2
|
||||
scikit-image==0.20.0
|
||||
scipy==1.10.1
|
||||
shapely==2.0.1
|
||||
six==1.16.0
|
||||
tifffile==2023.2.28
|
||||
torch==1.13.1+cu116
|
||||
torchvision==0.14.1+cu116
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.14
|
||||
|
||||
Reference in New Issue
Block a user