Исправление записи в БД

This commit is contained in:
2025-05-15 19:54:08 +03:00
parent 269269c66e
commit 7f64e1ce25

View File

@@ -34,8 +34,8 @@ skipped_count = 0
skipped_size_count = 0 skipped_size_count = 0
error_count = 0 error_count = 0
total_saved_bytes = 0 total_saved_bytes = 0
total_original_size = 0 total_images_original_size = 0
total_new_size = 0 total_images_new_size = 0
db_lock = threading.Lock() db_lock = threading.Lock()
processed_hashes = set() processed_hashes = set()
@@ -53,6 +53,23 @@ def get_tool_path(name: str) -> Path:
return Path("tools") / name return Path("tools") / name
def get_folder_size(path: Path) -> int:
total_size = 0
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
file_path = os.path.abspath(os.path.join(dirpath, filename))
if file_path in [
"image_compressor.exe",
"image_compressor.py",
"image_compressor.db",
"image_compressor.db-journal",
"image_compressor.log",
]:
continue
total_size += os.path.getsize(file_path)
return total_size
def file_hash(path: Path) -> str: def file_hash(path: Path) -> str:
hasher = hashlib.sha256() hasher = hashlib.sha256()
with path.open("rb") as f: with path.open("rb") as f:
@@ -206,73 +223,65 @@ def compress_with_pillow(path: Path) -> Tuple[bool, Path]:
def compress_image(path: Path, use_fallback: bool = False): def compress_image(path: Path, use_fallback: bool = False):
global processed_count, skipped_count, skipped_size_count, error_count, total_saved_bytes, total_original_size, total_new_size global processed_count, skipped_count, skipped_size_count, error_count, total_saved_bytes, total_images_original_size, total_images_new_size, processed_hashes
try: try:
if not path.exists(): original_size = path.stat().st_size
skipped_size_count += 1 total_images_original_size += original_size
logging.info(
f"Пропущено (не найден): {path} ({path.stat().st_size // 1024}KB)"
)
return
h = file_hash(path) h = file_hash(path)
if path.stat().st_size < MIN_SIZE: if path.stat().st_size < MIN_SIZE:
skipped_size_count += 1
logging.info( logging.info(
f"Пропущено (малый размер): {path} ({path.stat().st_size // 1024} KB)" f"Пропущено (малый размер): {path} ({path.stat().st_size // 1024} KB)"
) )
processed_hashes.add(h) processed_hashes.add(h)
skipped_size_count += 1
total_images_new_size += original_size
return return
original_size = path.stat().st_size
total_original_size += original_size
with db_lock: with db_lock:
cursor.execute( cursor.execute(
"SELECT filename FROM processed_images WHERE hash = ?", (h,) "SELECT filename FROM processed_images WHERE hash = ?", (h,)
) )
row = cursor.fetchone() row = cursor.fetchone()
if row:
hash_files = row[0].split("|")
file_path = str(path) file_path = str(path)
if file_path in hash_files: if row:
processed_hashes.add(h)
skipped_count += 1 skipped_count += 1
total_images_new_size += original_size
hash_files = row[0].split("|")
if file_path in hash_files:
logging.info( logging.info(
f"Пропущено (уже обработано): {path} ({original_size // 1024} KB)" f"Пропущено (уже обработано): {path} ({original_size // 1024} KB)"
) )
processed_hashes.add(h)
return
else: else:
hash_files.append(str(path)) hash_files.append(file_path)
cursor.execute( cursor.execute(
"UPDATE processed_images SET filename = ? WHERE hash = ?", "UPDATE processed_images SET filename = ? WHERE hash = ?",
("|".join(hash_files), h), ("|".join(hash_files), h),
) )
conn.commit() conn.commit()
skipped_count += 1
logging.info( logging.info(
f"Пропущено (дубликат хэша, другой путь): {path} ({original_size // 1024} KB)" f"Пропущено (дубликат хэша, другой путь): {path} ({original_size // 1024} KB)"
) )
processed_hashes.add(h)
return return
ext = path.suffix.lower() ext = path.suffix.lower()
result, final_path = compress_with_external(path, ext) result, final_path = compress_with_external(path, ext)
if result is None and use_fallback: if not result and use_fallback:
result, final_path = compress_with_pillow(path) result, final_path = compress_with_pillow(path)
if not final_path.exists():
error_count += 1
logging.error(f"Файл не найден после сжатия: {final_path}")
return
new_size = final_path.stat().st_size new_size = final_path.stat().st_size
total_new_size += new_size total_images_new_size += new_size
if result: if result:
h = file_hash(final_path)
processed_count += 1
saved = original_size - new_size saved = original_size - new_size
total_saved_bytes += saved total_saved_bytes += saved
percent = (1 - new_size / original_size) * 100 percent = (1 - new_size / original_size) * 100
@@ -280,18 +289,17 @@ def compress_image(path: Path, use_fallback: bool = False):
f"Сжато: {path} ({original_size//1024} KB -> {new_size//1024} KB, {percent:.2f}%)" f"Сжато: {path} ({original_size//1024} KB -> {new_size//1024} KB, {percent:.2f}%)"
) )
h = file_hash(final_path)
with db_lock: with db_lock:
cursor.execute( cursor.execute(
"SELECT filename FROM processed_images WHERE hash = ?", "SELECT filename FROM processed_images WHERE hash = ?",
(h,), (h,),
) )
row = cursor.fetchone() row = cursor.fetchone()
file_path = str(final_path)
if row: if row:
hash_files = row[0].split("|") hash_files = row[0].split("|")
file_path = str(path)
if file_path not in hash_files: if file_path not in hash_files:
hash_files.append(str(path)) hash_files.append(file_path)
cursor.execute( cursor.execute(
"UPDATE processed_images SET filename = ? WHERE hash = ?", "UPDATE processed_images SET filename = ? WHERE hash = ?",
("|".join(hash_files), h), ("|".join(hash_files), h),
@@ -300,17 +308,17 @@ def compress_image(path: Path, use_fallback: bool = False):
else: else:
cursor.execute( cursor.execute(
"INSERT INTO processed_images(hash, filename) VALUES(?, ?)", "INSERT INTO processed_images(hash, filename) VALUES(?, ?)",
(h, str(final_path)), (h, file_path),
) )
conn.commit() conn.commit()
processed_hashes.add(h)
processed_count += 1
else: else:
error_count += 1 error_count += 1
processed_hashes.add(h)
except Exception as e: except Exception as e:
error_count += 1 error_count += 1
total_images_new_size += original_size
logging.error(f"Ошибка при обработке {path}: {e}") logging.error(f"Ошибка при обработке {path}: {e}")
@@ -373,11 +381,13 @@ def main():
return return
use_fallback = True use_fallback = True
total_original_size = get_folder_size(input_dir)
files = prepare_and_copy_files(input_dir, output_dir) files = prepare_and_copy_files(input_dir, output_dir)
total_files = len(files) total_files = len(files)
print(f"Найдено {total_files} изображений.") print(f"Найдено {total_files} изображений.")
logging.info( logging.info(
f"Найдено {total_files} изображений. Fallback = {use_fallback}" f"Начато. Найдено {total_files} изображений. Fallback = {use_fallback}"
) )
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
@@ -387,6 +397,8 @@ def main():
for i, _ in enumerate(as_completed(futures), 1): for i, _ in enumerate(as_completed(futures), 1):
print(f"\rОбработка: {i}/{len(files)}", end="") print(f"\rОбработка: {i}/{len(files)}", end="")
total_new_size = get_folder_size(input_dir)
with db_lock: with db_lock:
cursor.execute("SELECT hash, filename FROM processed_images") cursor.execute("SELECT hash, filename FROM processed_images")
all_records = cursor.fetchall() all_records = cursor.fetchall()
@@ -398,12 +410,23 @@ def main():
db_file_list = [ db_file_list = [
f.strip() for f in filenames.split("|") if f.strip() f.strip() for f in filenames.split("|") if f.strip()
] ]
real_file_list = [f for f in db_file_list if Path(f).exists()] real_file_list = [
if not real_file_list or h in stale_hashes: f
for f in db_file_list
if Path(f).exists() and file_hash(Path(f)) == h
]
reasone = None
if h in stale_hashes:
reasone = "не встречался хэш"
if not real_file_list:
reasone = "файлы не сущестувуют"
if reasone:
cursor.execute( cursor.execute(
"DELETE FROM processed_images WHERE hash = ?", (h,) "DELETE FROM processed_images WHERE hash = ?", (h,)
) )
logging.info(f"Удалена запись в БД: {h} {db_file_list}") logging.info(
f'Удалена запись в БД по причине "{reasone}": {h} {db_file_list}'
)
deleted_count += 1 deleted_count += 1
else: else:
cursor.execute( cursor.execute(
@@ -412,34 +435,42 @@ def main():
) )
conn.commit() conn.commit()
print(f"\nУдалено устаревших записей из БД: {deleted_count}") print(f"\n\nУдалено записей в БД: {deleted_count}")
logging.info(f"Удалено устаревших записей из БД: {deleted_count}") logging.info(f"Удалено записей в БД: {deleted_count}")
print("\n\nГотово.") print("\nГотово.")
print(f"Обработано успешно: {processed_count}") print(f"Обработано успешно: {processed_count}")
print(f"Уже обработано: {skipped_count}") print(f"Пропущено уже обработанных: {skipped_count}")
print(f"Пропущено из-за размера: {skipped_size_count}") print(f"Пропущено малых: {skipped_size_count}")
print(f"Ошибки: {error_count}") print(f"Ошибки: {error_count}")
if total_original_size > 0: if total_original_size > 0:
total_percent_saved = (1 - total_new_size / total_original_size) * 100 total_percent_saved = (1 - total_new_size / total_original_size) * 100
print( else:
f"Общий размер до сжатия: {total_original_size / 1024 / 1024:.2f} MB" total_percent_saved = 0
) if total_images_original_size > 0:
print(f"Сэкономлено в процентах: {total_percent_saved:.2f}%") total_images_percent_saved = (
1 - total_images_new_size / total_images_original_size
) * 100
else:
total_images_percent_saved = 0
msg_total = f"Сжато всего: {total_original_size / 1024 / 1024:.2f} MB -> {total_new_size / 1024 / 1024:.2f} MB, {total_percent_saved:.2f}%)"
msg_total_images = f"Сжато изображений: {total_images_original_size / 1024 / 1024:.2f} MB -> {total_images_new_size / 1024 / 1024:.2f} MB, {total_images_percent_saved:.2f}%)"
print(msg_total)
print(msg_total_images)
logging.info( logging.info(
f"Завершено. Обработано успешно: {processed_count}, Уже обработано: {skipped_count}, Пропущено из-за размера: {skipped_size_count}, Ошибки: {error_count}" f"Завершено. Обработано успешно: {processed_count}, Уже обработано: {skipped_count}, Пропущено: {skipped_size_count}, Ошибки: {error_count}"
)
if total_original_size > 0:
total_percent_saved = (1 - total_new_size / total_original_size) * 100
logging.info(
f"Общий размер до сжатия: {total_original_size / 1024 / 1024:.2f} MB, Сэкономлено в процентах: {total_percent_saved:.2f}%"
) )
logging.info(msg_total)
logging.info(msg_total_images)
if __name__ == "__main__": if __name__ == "__main__":
try: try:
main() main()
except Exception: except Exception:
print("\nОшибка в main()")
logging.exception("Ошибка в main()") logging.exception("Ошибка в main()")
input("Нажмите Enter для выхода...") input("\nНажмите Enter для выхода...")