From 5498b2b8c83b84a103f88c50e4c56647ae15b4c8 Mon Sep 17 00:00:00 2001 From: Llloooggg Date: Wed, 14 May 2025 00:39:11 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA?= =?UTF-8?q?=D0=B0=20=D1=81=D1=82=D0=BE=D1=80=D0=BE=D0=BD=D0=BD=D0=B8=D0=BC?= =?UTF-8?q?=D0=B8=20=D1=83=D1=82=D0=B8=D0=BB=D0=B8=D1=82=D0=B0=D0=BC=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compile.bat | 1 + image_compressor.py | 313 ++++++++++++++++++++++++++++++++++++++++++++ main.py | 241 ---------------------------------- 3 files changed, 314 insertions(+), 241 deletions(-) create mode 100644 compile.bat create mode 100644 image_compressor.py delete mode 100644 main.py diff --git a/compile.bat b/compile.bat new file mode 100644 index 0000000..48b0192 --- /dev/null +++ b/compile.bat @@ -0,0 +1 @@ +pyinstaller --onefile --console --add-binary "tools/*.exe:tools" ./image_compressor.py \ No newline at end of file diff --git a/image_compressor.py b/image_compressor.py new file mode 100644 index 0000000..98d8691 --- /dev/null +++ b/image_compressor.py @@ -0,0 +1,313 @@ +import os +import sys +import hashlib +import sqlite3 +import subprocess +import logging +import argparse +import threading +import multiprocessing +import warnings +from PIL import Image, ImageFile +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor, as_completed + +ImageFile.LOAD_TRUNCATED_IMAGES = True + +warnings.filterwarnings("ignore", category=UserWarning, module="PIL") + +logging.basicConfig( + filename="image_compressor.log", + level=logging.INFO, + format="%(asctime)s - %(message)s", +) + +MIN_SIZE = 1.5 * 1024 * 1024 +TARGET_SIZE_MB = 1.5 * 1024 * 1024 +MAX_WORKERS = min(32, (multiprocessing.cpu_count() or 1) * 5) + +DB_PATH = "image_compressor.db" +conn = sqlite3.connect(DB_PATH, check_same_thread=False) +cursor = conn.cursor() +cursor.execute("CREATE TABLE IF NOT EXISTS processed (hash TEXT PRIMARY KEY)") +conn.commit() + +processed_count = 0 +skipped_count = 0 +total_saved_bytes = 0 +db_lock = threading.Lock() + + +def get_tool_path(tool_name): + if hasattr(sys, "_MEIPASS"): + return Path(sys._MEIPASS) / "tools" / tool_name + return Path("tools") / tool_name + + +def file_hash(path): + hasher = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def extract_exif(path): + try: + with Image.open(path) as img: + return img.info.get("exif") + except: + return None + + +def inject_exif(jpeg_path, exif_bytes): + try: + with Image.open(jpeg_path) as img: + rgb = img.convert("RGB") + rgb.save(jpeg_path, "JPEG", exif=exif_bytes) + except Exception as e: + logging.error(f"Ошибка при вставке EXIF в {jpeg_path}: {e}") + + +def compress_with_external(path: str, ext: str) -> bool: + path = Path(path) + original_size = path.stat().st_size + tmp_path = path.with_name(path.name + ".compressed") + target_size = TARGET_SIZE_MB + + try: + if ext == ".png": + tool = get_tool_path("oxipng.exe") + for compression_level in range(1, 8): + subprocess.run( + [ + tool, + "--strip", + "safe", + f"-o{compression_level}", + "--out", + str(tmp_path), + str(path), + ], + check=True, + ) + if os.path.getsize(tmp_path) <= target_size: + break + elif ext in [".jpg", ".jpeg"]: + exif_data = extract_exif(path) + + tool = get_tool_path("cjpeg-static.exe") + quality = 85 + while True: + subprocess.run( + [ + tool, + f"-quality", + str(quality), + f"-outfile", + str(tmp_path), + str(path), + ], + check=True, + ) + if os.path.getsize(tmp_path) <= target_size or quality < 50: + break + quality -= 5 + + if tmp_path.exists() and exif_data: + inject_exif(tmp_path, exif_data) + + elif ext == ".webp": + tool = get_tool_path("cwebp.exe") + quality = 80 + while True: + subprocess.run( + [ + tool, + str(path), + "-o", + str(tmp_path), + "-m", + "6", + "-q", + str(quality), + "-metadata", + "all", + ], + check=True, + ) + if os.path.getsize(tmp_path) <= target_size or quality < 50: + break + quality -= 5 + else: + return False + except FileNotFoundError: + return None + + if tmp_path.exists(): + new_size = os.path.getsize(tmp_path) + if new_size < original_size: + tmp_path.replace(path) + return True + else: + tmp_path.unlink() + return False + + +def compress_with_pillow(path: str) -> bool: + ext = Path(path).suffix.lower() + original_size = os.path.getsize(path) + target_size = TARGET_SIZE_MB + temp_path = Path(path).with_suffix(".pillowtmp") + + try: + with Image.open(path) as img: + img_format = img.format + exif = img.info.get("exif", None) + quality = 85 + + while quality >= 50: + img.save( + temp_path, + format=img_format, + optimize=True, + quality=quality, + exif=exif, + ) + if temp_path.stat().st_size <= target_size: + break + quality -= 5 + + if temp_path.exists() and temp_path.stat().st_size < original_size: + temp_path.replace(path) + return True + elif temp_path.exists(): + temp_path.unlink() + return False + + except Exception as e: + logging.error(f"Ошибка Pillow для {path}: {e}") + return False + + +def compress_image(path: str, fallback_to_pillow: bool = False): + global processed_count, skipped_count, total_saved_bytes + + try: + original_size = os.path.getsize(path) + if original_size < MIN_SIZE: + skipped_count += 1 + return + + h = file_hash(path) + with db_lock: + cursor.execute("SELECT 1 FROM processed WHERE hash = ?", (h,)) + if cursor.fetchone(): + skipped_count += 1 + return + + ext = Path(path).suffix.lower() + result = compress_with_external(path, ext) + + if result is None and fallback_to_pillow: + result = compress_with_pillow(path) + + new_size = os.path.getsize(path) + if result and new_size < original_size: + saved_bytes = original_size - new_size + total_saved_bytes += saved_bytes + + saved_percent = (1 - new_size / original_size) * 100 + msg = ( + f"Сжато: {path} " + f"({original_size / 1024:.1f} KB -> {new_size / 1024:.1f} KB, " + f"сохранено {saved_percent:.2f}%)" + ) + logging.info(msg) + else: + msg = f"Пропущено (не меньше): {path} ({original_size / 1024:.1f} KB)" + logging.info(msg) + + with db_lock: + cursor.execute("INSERT INTO processed(hash) VALUES(?)", (h,)) + conn.commit() + processed_count += 1 + + except Exception as e: + logging.error(f"Ошибка обработки {path}: {e}") + + +def find_images(root: str): + exts = {".png", ".jpg", ".jpeg", ".webp"} + for dirpath, _, filenames in os.walk(root): + for f in filenames: + if Path(f).suffix.lower() in exts: + yield Path(dirpath) / f + + +def main(): + parser = argparse.ArgumentParser(description="Компрессор изображенеий") + parser.add_argument( + "--input", + help="Путь для сканирования. По умолчанию текущая директория.", + default=None, + ) + parser.add_argument( + "--dry-run", + help="Показать, что будет сделано, без изменения файлов.", + action="store_true", + ) + args = parser.parse_args() + + if args.input: + input_dir = Path(args.input) + else: + print( + "Не указан путь. Обрабатывать текущую папку и все подпапки? [y/n]" + ) + choice = input().strip().lower() + if choice != "y": + print("Отменено.") + return + input_dir = Path(os.getcwd()) + + print("Проверка утилит...") + required_tools = ["cjpeg-static.exe", "cwebp.exe", "oxipng.exe"] + missing = [] + for tool in required_tools: + if not os.path.exists(get_tool_path(tool)): + missing.append(tool) + + fallback = False + if missing: + print("Не найдены внешние утилиты:", ", ".join(missing)) + print("Использовать Pillow вместо них, где возможно? [y/n]") + if input().strip().lower() == "y": + fallback = True + else: + print("Без утилит работа невозможна.") + return + + files = list(find_images(input_dir)) + print(f"Найдено {len(files)} изображений.") + + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + futures = [executor.submit(compress_image, f, fallback) for f in files] + for i, _ in enumerate(as_completed(futures), 1): + print(f"\rОбработка изображений: {i}/{len(files)}", end="") + + print("\nОбработка завершена.") + print(f"Всего обработано: {processed_count}") + print(f"Пропущено (маленькие/повтор): {skipped_count}") + print(f"Сэкономлено в среднем: {total_saved_bytes / 1024 / 1024:.2f} MB") + logging.info( + f"Завершено. Обработано: {processed_count}, Пропущено: {skipped_count}, Сэкономлено: {total_saved_bytes / 1024 / 1024:.2f} MB" + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + logging.exception("Ошибка в main()") + input() diff --git a/main.py b/main.py deleted file mode 100644 index 32eaf33..0000000 --- a/main.py +++ /dev/null @@ -1,241 +0,0 @@ -import os -import hashlib -import sqlite3 -import argparse -import logging -from PIL import Image, ImageFile -from io import BytesIO -from concurrent.futures import ThreadPoolExecutor, as_completed -from tqdm import tqdm -import threading - -ImageFile.LOAD_TRUNCATED_IMAGES = True - -DB_NAME = "compressed_images.db" -LOG_FILE = "compression.log" -TARGET_SIZE_MB = 1.5 -MIN_SIZE_MB = 1.0 -THREADS = os.cpu_count() or 4 -LOCK = threading.Lock() - -stats = { - "total": 0, - "skipped": 0, - "already_done": 0, - "compressed": 0, - "original_bytes": 0, - "compressed_bytes": 0, -} - - -def init_logging(): - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", - handlers=[ - logging.FileHandler(LOG_FILE, mode="w", encoding="utf-8"), - logging.StreamHandler(), - ], - ) - - -def init_db(): - conn = sqlite3.connect(DB_NAME) - with conn: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS compressed_images ( - hash TEXT PRIMARY KEY - ) - """ - ) - conn.close() - - -def calculate_hash(file_path): - hasher = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(65536), b""): - hasher.update(chunk) - return hasher.hexdigest() - - -def is_already_compressed(file_hash): - with LOCK: - conn = sqlite3.connect(DB_NAME) - cur = conn.cursor() - cur.execute( - "SELECT 1 FROM compressed_images WHERE hash = ?", (file_hash,) - ) - result = cur.fetchone() - conn.close() - return result is not None - - -def mark_as_compressed(file_hash): - with LOCK: - conn = sqlite3.connect(DB_NAME) - conn.execute( - "INSERT OR IGNORE INTO compressed_images (hash) VALUES (?)", - (file_hash,), - ) - conn.commit() - conn.close() - - -def compress_image(file_path): - try: - stats["total"] += 1 - original_size = os.path.getsize(file_path) - size_mb = original_size / (1024 * 1024) - - if size_mb < MIN_SIZE_MB: - stats["skipped"] += 1 - return f"Пропущено (<1MB): {file_path}" - - file_hash = calculate_hash(file_path) - if is_already_compressed(file_hash): - stats["already_done"] += 1 - return f"Пропущено (уже обработано): {file_path}" - - img = Image.open(file_path) - img_format = img.format - img_exif = img.info.get("exif", None) - - # Поддержка только RGB/RGBA для webp - if img.mode not in ["RGB", "RGBA"]: - img = img.convert("RGB") - - quality = 95 - step = 5 - buffer = BytesIO() - - while quality > 10: - buffer.seek(0) - buffer.truncate() - try: - img.save( - buffer, - format=img_format, - quality=quality, - optimize=True, - exif=img_exif, - ) - except Exception: - img.save( - buffer, format=img_format, quality=quality, optimize=True - ) - size = buffer.tell() / (1024 * 1024) - if size <= TARGET_SIZE_MB: - break - quality -= step - - compressed_size = buffer.tell() - - if compressed_size < original_size: - with open(file_path, "wb") as f: - f.write(buffer.getvalue()) - - stats["compressed"] += 1 - stats["original_bytes"] += original_size - stats["compressed_bytes"] += compressed_size - mark_as_compressed(file_hash) - - saved = original_size - compressed_size - return f"Сжато: {file_path} (-{saved / 1024 / 1024:.2f} MB)" - else: - stats["skipped"] += 1 - return f"Без изменений (не удалось уменьшить): {file_path}" - except Exception as e: - return f"Ошибка: {file_path} — {e}" - - -def walk_images(root): - supported = {".jpg", ".jpeg", ".png", ".webp"} - for dirpath, _, filenames in os.walk(root): - for name in filenames: - if os.path.splitext(name)[1].lower() in supported: - yield os.path.join(dirpath, name) - - -def get_user_confirmation(default_path): - print( - f"Будет выполнен рекурсивный обход всех изображений по пути: {default_path}" - ) - answer = input("Продолжить? [y/n]: ").strip().lower() - return answer == "y" - - -def print_summary(): - original = stats["original_bytes"] - compressed = stats["compressed_bytes"] - saved = original - compressed - - if original > 0: - saved_pct = saved / original * 100 - else: - saved_pct = 0.0 - - summary = ( - f"\n==== СТАТИСТИКА ====\n" - f"Всего файлов: {stats['total']}\n" - f"Сжато: {stats['compressed']}\n" - f"Пропущено: {stats['skipped']} (мелкие/не уменьшено)\n" - f"Уже обработано ранее: {stats['already_done']}\n" - f"Экономия: {saved / 1024 / 1024:.2f} MB ({saved_pct:.2f}%)\n" - f"====================\n" - ) - logging.info(summary) - - -def main(): - init_logging() - - parser = argparse.ArgumentParser( - description="Сжатие изображений с сохранением качества, EXIF и учётом уже обработанных." - ) - parser.add_argument( - "--input", - "-i", - type=str, - help="Путь к папке с изображениями", - default=".", - ) - parser.add_argument( - "--output", - "-o", - type=str, - help="Путь для вывода (не используется пока)", - default=None, - ) - args = parser.parse_args() - - input_path = os.path.abspath(args.input) - - if input_path == os.path.abspath(".") and not get_user_confirmation( - input_path - ): - print("Операция отменена.") - return - - init_db() - images = list(walk_images(input_path)) - - logging.info(f"Найдено {len(images)} изображений для обработки...") - - with ThreadPoolExecutor(max_workers=THREADS) as executor: - futures = {executor.submit(compress_image, img): img for img in images} - for future in tqdm( - as_completed(futures), - total=len(futures), - desc="Обработка изображений", - ): - result = future.result() - if result: - logging.info(result) - - print_summary() - - -if __name__ == "__main__": - main()