You've already forked MineEVECraft
mirror of
https://github.com/Llloooggg/MineEVECraft.git
synced 2026-03-06 03:36:24 +03:00
Базовое определение текста через easyocr
This commit is contained in:
139
main.py
139
main.py
@@ -1,17 +1,15 @@
|
||||
import time
|
||||
import math
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pygetwindow as gw
|
||||
import pyautogui
|
||||
from pytesseract import pytesseract
|
||||
from pytesseract import Output
|
||||
import cv2
|
||||
import easyocr
|
||||
|
||||
|
||||
win_name = "EVE - Nostrom Stone"
|
||||
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -22,8 +20,6 @@ logging.basicConfig(
|
||||
|
||||
save_result = True
|
||||
|
||||
pd.options.mode.use_inf_as_na = True
|
||||
|
||||
|
||||
def save_highlighted_screenshot(screenshot, boxes, filename):
|
||||
new_image = screenshot.copy()
|
||||
@@ -90,108 +86,33 @@ def get_screenshot():
|
||||
return cv2.cvtColor(np.array(screenshot), cv2.COLOR_BGR2RGB)
|
||||
|
||||
|
||||
def get_boxes(screenshot):
|
||||
inverted_screenshot = cv2.bitwise_not(
|
||||
cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
|
||||
)
|
||||
|
||||
raw_boxes = pytesseract.image_to_data(
|
||||
inverted_screenshot,
|
||||
lang="eng",
|
||||
output_type=Output.DATAFRAME,
|
||||
config="--psm 3",
|
||||
)
|
||||
|
||||
if save_result:
|
||||
raw_boxes.to_excel("xlsx/0_raw_boxes.xlsx", index=False)
|
||||
|
||||
base_boxes = raw_boxes.loc[
|
||||
(raw_boxes["conf"] > 30)
|
||||
& (raw_boxes["text"].notnull())
|
||||
& (raw_boxes["text"].str.isalnum())
|
||||
]
|
||||
|
||||
if save_result:
|
||||
save_highlighted_screenshot(
|
||||
screenshot, base_boxes, "1_base_highlighted_screenshot"
|
||||
)
|
||||
base_boxes.to_excel("xlsx/1_base_boxes.xlsx", index=False)
|
||||
|
||||
logging.info("Боксы получены")
|
||||
|
||||
return base_boxes
|
||||
|
||||
|
||||
def union_boxes(base_boxes):
|
||||
result_phrases = pd.DataFrame(
|
||||
columns=[
|
||||
"left",
|
||||
"top",
|
||||
"width",
|
||||
"height",
|
||||
"text",
|
||||
"block_num",
|
||||
"line_num",
|
||||
"par_num",
|
||||
]
|
||||
)
|
||||
for box in base_boxes["block_num"].unique():
|
||||
paragraphs_in_box = base_boxes.loc[base_boxes["block_num"] == box][
|
||||
"par_num"
|
||||
].unique()
|
||||
for paragraph in paragraphs_in_box:
|
||||
words_in_paragraph = base_boxes.loc[
|
||||
(base_boxes["block_num"] == box)
|
||||
& (base_boxes["par_num"] == paragraph),
|
||||
]
|
||||
|
||||
grouped_words = words_in_paragraph.groupby(
|
||||
"line_num", as_index=False
|
||||
)
|
||||
|
||||
box_phrases = grouped_words["width"].sum()
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["height"].max(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["left"].min(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["top"].min(), on="line_num", how="left"
|
||||
)
|
||||
box_phrases = box_phrases.merge(
|
||||
grouped_words["text"].apply(" ".join),
|
||||
on="line_num",
|
||||
how="left",
|
||||
)
|
||||
box_phrases["block_num"] = box
|
||||
|
||||
rightest_box = words_in_paragraph.loc[
|
||||
words_in_paragraph["left"] == words_in_paragraph["left"].max()
|
||||
]
|
||||
leftest_box = words_in_paragraph.loc[
|
||||
words_in_paragraph["left"] == words_in_paragraph["left"].min()
|
||||
]
|
||||
box_phrases["width"] = (
|
||||
rightest_box.iloc[0].left
|
||||
+ rightest_box.iloc[0].width
|
||||
- leftest_box.iloc[0].left
|
||||
)
|
||||
|
||||
result_phrases = pd.concat([result_phrases, box_phrases])
|
||||
|
||||
if save_result:
|
||||
result_phrases.to_excel("xlsx/2_unioned_boxes.xlsx", index=False)
|
||||
|
||||
logging.info("Боксы объединены")
|
||||
|
||||
return result_phrases
|
||||
|
||||
|
||||
screenshot = get_screenshot()
|
||||
base_boxes = get_boxes(screenshot)
|
||||
unioned_boxes = union_boxes(base_boxes)
|
||||
|
||||
save_highlighted_screenshot(
|
||||
screenshot, unioned_boxes, "2_unioned_highlighted_screenshot"
|
||||
)
|
||||
reader = easyocr.Reader(["en"], gpu=True)
|
||||
result = reader.readtext(screenshot)
|
||||
|
||||
|
||||
for bbox, text, prob in result:
|
||||
(tl, tr, br, bl) = bbox
|
||||
tl = (int(tl[0]), int(tl[1]))
|
||||
tr = (int(tr[0]), int(tr[1]))
|
||||
br = (int(br[0]), int(br[1]))
|
||||
bl = (int(bl[0]), int(bl[1]))
|
||||
|
||||
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
|
||||
cv2.rectangle(screenshot, tl, br, (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
screenshot,
|
||||
text,
|
||||
(tl[0], tl[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
|
||||
print(result)
|
||||
|
||||
cv2.imshow("screenshot", screenshot)
|
||||
cv2.waitKey(0)
|
||||
|
||||
Reference in New Issue
Block a user