# -*- coding: utf-8 -*-
"""
🤖 本地模型管理模块 - 使用 llama-cpp-python 驱动本地大语言模型

支持功能：
- 模型下载（支持断点续传）
- 模型加载和卸载
- 文本生成和分类
- 与现有分类逻辑集成
"""

import os
import sys
import json
import threading
import time
from pathlib import Path
from typing import Optional, Callable, List, Dict, Any
from logger import logger

# 模型存储目录
MODEL_DIR = Path.home() / ".fileneatai" / "models"
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# 配置文件
CONFIG_FILE = Path.home() / ".fileneatai" / "local_model_config.json"


# ═══════════════════════════════════════════════════════════
# GPU / CUDA 自动检测
# ═══════════════════════════════════════════════════════════

def detect_nvidia_gpu() -> dict:
    """
    检测系统是否有 NVIDIA GPU 及 CUDA 环境
    
    Returns:
        dict: {
            "has_nvidia_gpu": bool,       # 是否有 NVIDIA 显卡
            "gpu_name": str,              # 显卡名称
            "has_cuda_runtime": bool,     # 是否有 CUDA 运行时
            "cuda_version": str,          # CUDA 版本号
            "recommended_gpu_layers": int, # 推荐的 GPU 层数（-1=全GPU, 0=纯CPU）
            "message": str                # 人类可读的状态描述
        }
    """
    result = {
        "has_nvidia_gpu": False,
        "gpu_name": "",
        "has_cuda_runtime": False,
        "cuda_version": "",
        "recommended_gpu_layers": 0,
        "message": ""
    }
    
    # 1. 检测 NVIDIA GPU（通过 nvidia-smi）
    try:
        import subprocess
        proc = subprocess.run(
            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
            capture_output=True, text=True, timeout=5, creationflags=0x08000000  # CREATE_NO_WINDOW
        )
        if proc.returncode == 0 and proc.stdout.strip():
            parts = proc.stdout.strip().split('\n')[0].split(',')
            gpu_name = parts[0].strip() if parts else "NVIDIA GPU"
            result["has_nvidia_gpu"] = True
            result["gpu_name"] = gpu_name
            logger.info(f"🎮 检测到 NVIDIA GPU: {gpu_name}")
    except (FileNotFoundError, subprocess.TimeoutExpired, Exception) as e:
        logger.debug(f"nvidia-smi 不可用: {e}")
    
    # 2. 检测 CUDA 运行时
    if result["has_nvidia_gpu"]:
        # 检查 nvcc（CUDA Toolkit 编译器）
        try:
            proc = subprocess.run(
                ["nvcc", "--version"],
                capture_output=True, text=True, timeout=5, creationflags=0x08000000
            )
            if proc.returncode == 0:
                import re
                match = re.search(r'release (\d+\.\d+)', proc.stdout)
                if match:
                    result["cuda_version"] = match.group(1)
                    result["has_cuda_runtime"] = True
                    logger.info(f"🔧 CUDA Toolkit 版本: {result['cuda_version']}")
        except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
            pass
        
        # 也检查 CUDA DLL 是否存在（即使没有 nvcc，打包时可能自带了 CUDA DLL）
        if not result["has_cuda_runtime"]:
            cuda_dll_names = ["cudart64_12.dll", "cudart64_11.dll", "nvcuda.dll"]
            for dll_name in cuda_dll_names:
                for search_dir in [r"C:\Windows\System32", r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"]:
                    if os.path.exists(os.path.join(search_dir, dll_name)):
                        result["has_cuda_runtime"] = True
                        logger.info(f"🔧 检测到 CUDA DLL: {dll_name}")
                        break
                if result["has_cuda_runtime"]:
                    break
    
    # 3. 检测 llama-cpp-python 是否编译了 CUDA 支持
    llama_has_cuda = False
    try:
        import importlib
        llama_cpp_spec = importlib.util.find_spec("llama_cpp")
        if llama_cpp_spec and llama_cpp_spec.origin:
            lib_dir = os.path.join(os.path.dirname(llama_cpp_spec.origin), "lib")
            if os.path.isdir(lib_dir):
                for f in os.listdir(lib_dir):
                    if "cuda" in f.lower() or "cublas" in f.lower() or "ggml-cuda" in f.lower():
                        llama_has_cuda = True
                        logger.info(f"🔧 llama-cpp-python 包含 CUDA 支持: {f}")
                        break
    except Exception:
        pass
    
    # 4. 生成推荐配置和消息
    if result["has_nvidia_gpu"] and llama_has_cuda:
        result["recommended_gpu_layers"] = -1  # 全部用 GPU
        result["message"] = f"✅ 检测到 {result['gpu_name']}，已启用 GPU 加速"
    elif result["has_nvidia_gpu"] and result["has_cuda_runtime"] and not llama_has_cuda:
        result["recommended_gpu_layers"] = 0
        result["message"] = (
            f"⚠ 检测到 {result['gpu_name']}，但 llama-cpp-python 未编译 CUDA 支持。\n"
            f"如需 GPU 加速，请用以下命令重装：\n"
            f"set CMAKE_ARGS=-DGGML_CUDA=ON\n"
            f"pip install llama-cpp-python --force-reinstall --no-cache-dir --no-binary llama-cpp-python"
        )
    elif result["has_nvidia_gpu"] and not result["has_cuda_runtime"]:
        result["recommended_gpu_layers"] = 0
        result["message"] = (
            f"⚠ 检测到 {result['gpu_name']}，但未安装 CUDA Toolkit。\n"
            f"下载地址: https://developer.nvidia.com/cuda-downloads\n"
            f"安装 CUDA 后重装 llama-cpp-python 即可启用 GPU 加速"
        )
    else:
        result["recommended_gpu_layers"] = 0
        result["message"] = "未检测到 NVIDIA GPU，使用 CPU 模式运行"
    
    logger.info(f"🎮 GPU 检测结果: recommended_gpu_layers={result['recommended_gpu_layers']}")
    return result


# 缓存 GPU 检测结果（避免重复执行 nvidia-smi）
_gpu_info_cache = None

def get_gpu_info() -> dict:
    """获取 GPU 信息（带缓存）"""
    global _gpu_info_cache
    if _gpu_info_cache is None:
        _gpu_info_cache = detect_nvidia_gpu()
    return _gpu_info_cache

def get_recommended_gpu_layers() -> int:
    """获取推荐的 GPU 层数（-1=全GPU, 0=纯CPU）"""
    return get_gpu_info()["recommended_gpu_layers"]


# ═══════════════════════════════════════════════════════════
# 预设模型列表（使用国内镜像加速）
# ═══════════════════════════════════════════════════════════
AVAILABLE_MODELS = {
    "qwen2.5-1.5b": {
        "name": "Qwen2.5-1.5B-Instruct (推荐)",
        "description": "通义千问2.5 1.5B参数版，体积小速度快，适合文件分类",
        "url": "https://hf-mirror.com/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf",
        "filename": "qwen2.5-1.5b-instruct-q4_k_m.gguf",
        "size_mb": 1100,  # 约 1.1GB
        "memory_required_mb": 2048,  # 需要约 2GB 内存
        "context_length": 4096,
        "recommended": True
    },
    "qwen2.5-3b": {
        "name": "Qwen2.5-3B-Instruct",
        "description": "通义千问2.5 3B参数版，效果更好，需要更多内存",
        "url": "https://hf-mirror.com/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q4_k_m.gguf",
        "filename": "qwen2.5-3b-instruct-q4_k_m.gguf",
        "size_mb": 2100,  # 约 2.1GB
        "memory_required_mb": 4096,  # 需要约 4GB 内存
        "context_length": 4096,
        "recommended": False
    },
    "qwen2.5-7b": {
        "name": "Qwen2.5-7B-Instruct",
        "description": "通义千问2.5 7B参数版，效果优秀，需要较大内存",
        "url": "https://hf-mirror.com/Qwen/Qwen2.5-7B-Instruct-GGUF/resolve/main/qwen2.5-7b-instruct-q4_k_m.gguf",
        "filename": "qwen2.5-7b-instruct-q4_k_m.gguf",
        "size_mb": 4700,  # 约 4.7GB
        "memory_required_mb": 8192,  # 需要约 8GB 内存
        "context_length": 4096,
        "recommended": False
    },
    "llama3.2-1b": {
        "name": "Llama3.2-1B-Instruct",
        "description": "Meta Llama3.2 1B参数版，体积最小，英文效果好",
        "url": "https://hf-mirror.com/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
        "filename": "Llama-3.2-1B-Instruct-Q4_K_M.gguf",
        "size_mb": 800,  # 约 0.8GB
        "memory_required_mb": 1536,  # 需要约 1.5GB 内存
        "context_length": 4096,
        "recommended": False
    },
    "llama3.2-3b": {
        "name": "Llama3.2-3B-Instruct",
        "description": "Meta Llama3.2 3B参数版，英文效果优秀",
        "url": "https://hf-mirror.com/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf",
        "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf",
        "size_mb": 2000,  # 约 2.0GB
        "memory_required_mb": 3072,  # 需要约 3GB 内存
        "context_length": 4096,
        "recommended": False
    }
}


class ModelDownloader:
    """模型下载器 - 支持断点续传和进度回调"""
    
    def __init__(self, model_id: str, 
                 progress_callback: Optional[Callable[[int, int, float], None]] = None,
                 complete_callback: Optional[Callable[[bool, str], None]] = None):
        """
        初始化下载器
        
        Args:
            model_id: 模型标识符
            progress_callback: 进度回调 (downloaded_bytes, total_bytes, speed_mbps)
            complete_callback: 完成回调 (success, message)
        """
        self.model_id = model_id
        self.model_info = AVAILABLE_MODELS.get(model_id)
        self.progress_callback = progress_callback
        self.complete_callback = complete_callback
        self._stop_flag = False
        self._thread: Optional[threading.Thread] = None
    
    def start(self):
        """开始下载"""
        if not self.model_info:
            if self.complete_callback:
                self.complete_callback(False, f"未知模型: {self.model_id}")
            return
        
        self._stop_flag = False
        self._thread = threading.Thread(target=self._download_thread, daemon=True)
        self._thread.start()
    
    def stop(self):
        """停止下载"""
        self._stop_flag = True
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=5)
    
    def _download_thread(self):
        """下载线程"""
        import requests
        
        try:
            url = self.model_info["url"]
            filename = self.model_info["filename"]
            filepath = MODEL_DIR / filename
            temp_filepath = MODEL_DIR / f"{filename}.downloading"
            
            # 检查是否已下载完成
            if filepath.exists():
                logger.info(f"模型已存在: {filepath}")
                if self.complete_callback:
                    self.complete_callback(True, str(filepath))
                return
            
            # 断点续传：检查已下载的部分
            downloaded_size = 0
            if temp_filepath.exists():
                downloaded_size = temp_filepath.stat().st_size
                logger.info(f"断点续传：已下载 {downloaded_size / 1024 / 1024:.1f} MB")
            
            # 发起请求
            headers = {}
            if downloaded_size > 0:
                headers["Range"] = f"bytes={downloaded_size}-"
            
            logger.info(f"开始下载模型: {url}")
            response = requests.get(url, headers=headers, stream=True, timeout=30)
            
            # 获取总大小
            if response.status_code == 206:  # 部分内容（断点续传）
                content_range = response.headers.get("Content-Range", "")
                total_size = int(content_range.split("/")[-1]) if "/" in content_range else 0
            elif response.status_code == 200:
                total_size = int(response.headers.get("Content-Length", 0))
                downloaded_size = 0  # 重新开始
            else:
                raise Exception(f"下载失败: HTTP {response.status_code}")
            
            if total_size == 0:
                total_size = self.model_info["size_mb"] * 1024 * 1024  # 估算
            
            # 下载
            mode = "ab" if downloaded_size > 0 else "wb"
            start_time = time.time()
            last_report_time = start_time
            last_downloaded = downloaded_size
            
            with open(temp_filepath, mode) as f:
                for chunk in response.iter_content(chunk_size=1024 * 1024):  # 1MB chunks
                    if self._stop_flag:
                        logger.info("下载已取消")
                        if self.complete_callback:
                            self.complete_callback(False, "下载已取消")
                        return
                    
                    if chunk:
                        f.write(chunk)
                        downloaded_size += len(chunk)
                        
                        # 计算速度并报告进度
                        current_time = time.time()
                        if current_time - last_report_time >= 0.5:  # 每0.5秒报告一次
                            elapsed = current_time - last_report_time
                            speed = (downloaded_size - last_downloaded) / elapsed / 1024 / 1024  # MB/s
                            
                            if self.progress_callback:
                                self.progress_callback(downloaded_size, total_size, speed)
                            
                            last_report_time = current_time
                            last_downloaded = downloaded_size
            
            # 下载完成，重命名文件
            temp_filepath.rename(filepath)
            logger.info(f"模型下载完成: {filepath}")
            
            if self.complete_callback:
                self.complete_callback(True, str(filepath))
                
        except Exception as e:
            logger.error(f"下载模型失败: {str(e)}")
            if self.complete_callback:
                self.complete_callback(False, f"下载失败: {str(e)}")


class LocalModel:
    """本地模型 - 使用 llama-cpp-python 驱动"""
    
    _instance: Optional["LocalModel"] = None
    _lock = threading.Lock()
    
    def __init__(self):
        self.model_path: Optional[str] = None
        self.llm = None
        self._loaded = False
        self._loading = False
        self._inference_lock = threading.Lock()  # llama-cpp-python 不支持并发推理
    
    @classmethod
    def get_instance(cls) -> "LocalModel":
        """获取单例实例"""
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = cls()
        return cls._instance
    
    def is_loaded(self) -> bool:
        """检查模型是否已加载"""
        return self._loaded and self.llm is not None
    
    def is_loading(self) -> bool:
        """检查模型是否正在加载"""
        return self._loading
    
    def get_model_path(self) -> Optional[str]:
        """获取当前加载的模型路径"""
        return self.model_path if self._loaded else None
    
    @staticmethod
    def _get_cpu_diagnostics() -> str:
        """获取完整的CPU和系统诊断信息，用于排查本地模型加载失败的问题"""
        import platform
        
        info_lines = []
        info_lines.append("=" * 60)
        info_lines.append("本地模型加载诊断信息")
        info_lines.append("=" * 60)
        
        # 基础系统信息
        try:
            info_lines.append(f"操作系统: {platform.platform()}")
            info_lines.append(f"系统版本: {platform.version()}")
            info_lines.append(f"CPU: {platform.processor()}")
            info_lines.append(f"架构: {platform.machine()}")
            info_lines.append(f"Python版本: {platform.python_version()}")
            info_lines.append(f"Python编译器: {platform.python_compiler()}")
        except Exception as e:
            info_lines.append(f"基础信息获取失败: {e}")
        
        # 内存信息
        try:
            import psutil
            mem = psutil.virtual_memory()
            info_lines.append(f"总内存: {mem.total / 1024 / 1024 / 1024:.1f} GB")
            info_lines.append(f"可用内存: {mem.available / 1024 / 1024 / 1024:.1f} GB")
            info_lines.append(f"内存使用率: {mem.percent}%")
        except ImportError:
            info_lines.append("内存信息: psutil 未安装，无法获取")
        except Exception as e:
            info_lines.append(f"内存信息获取失败: {e}")
        
        # AVX/AVX2 指令集检测（Windows）
        try:
            import ctypes
            if sys.platform == 'win32':
                kernel32 = ctypes.windll.kernel32
                if hasattr(kernel32, 'IsProcessorFeaturePresent'):
                    # Windows API: IsProcessorFeaturePresent
                    # PF_XMMI_INSTRUCTIONS_AVAILABLE = 6 (SSE)
                    # PF_XMMI64_INSTRUCTIONS_AVAILABLE = 10 (SSE2)
                    # PF_SSE3_INSTRUCTIONS_AVAILABLE = 13 (SSE3)
                    # PF_AVX_INSTRUCTIONS_AVAILABLE = 39 (AVX) - Win10+
                    # PF_AVX2_INSTRUCTIONS_AVAILABLE = 40 (AVX2) - Win10+
                    # PF_AVX512F_INSTRUCTIONS_AVAILABLE = 41 (AVX512) - Win10+
                    
                    sse = kernel32.IsProcessorFeaturePresent(6)
                    sse2 = kernel32.IsProcessorFeaturePresent(10)
                    sse3 = kernel32.IsProcessorFeaturePresent(13)
                    
                    info_lines.append(f"SSE 支持: {'是' if sse else '否'}")
                    info_lines.append(f"SSE2 支持: {'是' if sse2 else '否'}")
                    info_lines.append(f"SSE3 支持: {'是' if sse3 else '否'}")
                    
                    # AVX/AVX2 检测（仅 Windows 10+ 支持这些常量）
                    try:
                        avx = kernel32.IsProcessorFeaturePresent(39)
                        avx2 = kernel32.IsProcessorFeaturePresent(40)
                        info_lines.append(f"AVX 支持: {'是' if avx else '否'}")
                        info_lines.append(f"AVX2 支持: {'是' if avx2 else '否'}")
                        
                        if not avx2:
                            info_lines.append("⚠️ 您的CPU不支持AVX2指令集！")
                            info_lines.append("   llama-cpp-python 预编译版需要 AVX2 支持")
                            info_lines.append("   Intel: 需要第4代酷睿(Haswell, 2013年)或更新")
                            info_lines.append("   AMD: 需要 Zen 架构(锐龙, 2017年)或更新")
                    except Exception:
                        info_lines.append("AVX/AVX2 检测: 当前系统版本不支持检测（可能是Win7/8）")
                        info_lines.append("⚠️ Windows 7/8 无法通过API检测AVX，但错误码可辅助判断")
                else:
                    info_lines.append("CPU特性检测: IsProcessorFeaturePresent 不可用")
            else:
                info_lines.append(f"非Windows系统，跳过指令集检测 (platform={sys.platform})")
        except Exception as e:
            info_lines.append(f"CPU指令集检测失败: {e}")
        
        # 检查 llama_cpp DLL 文件
        try:
            dll_search_paths = []
            
            # 打包环境
            if getattr(sys, 'frozen', False):
                base = getattr(sys, '_MEIPASS', os.path.dirname(sys.executable))
                dll_search_paths.append(os.path.join(base, 'llama_cpp', 'lib'))
                dll_search_paths.append(os.path.join(base, '_internal', 'llama_cpp', 'lib'))
                dll_search_paths.append(base)
            
            # 标准 site-packages
            try:
                import llama_cpp
                pkg_dir = os.path.dirname(llama_cpp.__file__)
                dll_search_paths.append(os.path.join(pkg_dir, 'lib'))
                info_lines.append(f"llama_cpp 包路径: {pkg_dir}")
            except ImportError:
                info_lines.append("llama_cpp 包: 未安装或无法导入")
            except Exception as import_err:
                info_lines.append(f"llama_cpp 包导入异常: {import_err}")
            
            expected_dlls = ['ggml-base.dll', 'ggml-cpu.dll', 'ggml.dll', 'llama.dll']
            for search_path in dll_search_paths:
                if os.path.exists(search_path):
                    found_files = os.listdir(search_path)
                    info_lines.append(f"DLL目录 [{search_path}]: {', '.join(found_files)}")
                    for dll in expected_dlls:
                        dll_path = os.path.join(search_path, dll)
                        if os.path.exists(dll_path):
                            size_mb = os.path.getsize(dll_path) / 1024 / 1024
                            info_lines.append(f"  ✓ {dll} ({size_mb:.1f} MB)")
                        else:
                            info_lines.append(f"  ✗ {dll} 缺失!")
                else:
                    info_lines.append(f"DLL目录不存在: {search_path}")
        except Exception as e:
            info_lines.append(f"DLL检查失败: {e}")
        
        # VC++ 运行时检测
        try:
            import ctypes
            vc_dlls = ['vcruntime140.dll', 'vcruntime140_1.dll', 'msvcp140.dll']
            for dll_name in vc_dlls:
                try:
                    ctypes.CDLL(dll_name)
                    info_lines.append(f"VC++ {dll_name}: 已安装")
                except OSError:
                    info_lines.append(f"VC++ {dll_name}: ⚠️ 未找到!")
        except Exception as e:
            info_lines.append(f"VC++运行时检测失败: {e}")
        
        # 模型文件检查
        try:
            model_dir = Path.home() / ".fileneatai" / "models"
            if model_dir.exists():
                model_files = list(model_dir.glob("*.gguf"))
                info_lines.append(f"模型目录: {model_dir}")
                if model_files:
                    for mf in model_files:
                        size_mb = mf.stat().st_size / 1024 / 1024
                        info_lines.append(f"  模型文件: {mf.name} ({size_mb:.0f} MB)")
                else:
                    info_lines.append("  ⚠️ 没有找到已下载的模型文件")
            else:
                info_lines.append(f"模型目录不存在: {model_dir}")
        except Exception as e:
            info_lines.append(f"模型文件检查失败: {e}")
        
        info_lines.append("=" * 60)
        return "\n".join(info_lines)
    
    def load(self, model_path: str, n_ctx: int = 4096, n_gpu_layers: int = None,
             n_threads: int = 4,
             progress_callback: Optional[Callable[[str], None]] = None) -> bool:
        """
        加载模型
        
        Args:
            model_path: 模型文件路径
            n_ctx: 上下文长度
            n_gpu_layers: GPU 层数（None=自动检测，0=纯CPU，-1=全部用GPU）
            n_threads: CPU线程数
            progress_callback: 加载进度回调
        
        Returns:
            是否加载成功
        """
        # 自动检测 GPU：如果用户没有明确指定，则根据硬件自动选择
        if n_gpu_layers is None:
            n_gpu_layers = get_recommended_gpu_layers()
            gpu_info = get_gpu_info()
            logger.info(f"🎮 GPU 自动检测: {gpu_info['message']}")
            if progress_callback and gpu_info["has_nvidia_gpu"]:
                progress_callback(gpu_info["message"])
        if self._loading:
            logger.warning("模型正在加载中...")
            return False
        
        try:
            self._loading = True
            
            # 先卸载旧模型
            if self._loaded:
                self.unload()
            
            # 记录完整的诊断信息（加载前）
            logger.info("=" * 60)
            logger.info("开始加载本地模型 - 记录诊断信息")
            logger.info("=" * 60)
            try:
                diagnostics = self._get_cpu_diagnostics()
                logger.info(f"\n{diagnostics}")
            except Exception as diag_err:
                logger.warning(f"获取诊断信息失败: {diag_err}")
            
            if progress_callback:
                progress_callback("正在初始化 llama-cpp-python...")
            
            # 检查模型文件是否存在
            if not os.path.exists(model_path):
                error_msg = f"模型文件不存在: {model_path}"
                logger.error(error_msg)
                if progress_callback:
                    progress_callback(f"加载失败: {error_msg}")
                return False
            
            model_size_mb = os.path.getsize(model_path) / 1024 / 1024
            logger.info(f"模型文件: {model_path} ({model_size_mb:.0f} MB)")
            
            # 尝试导入 llama-cpp-python
            try:
                from llama_cpp import Llama
                logger.info("llama-cpp-python 导入成功")
            except ImportError as e:
                logger.error(f"llama-cpp-python 未安装: {str(e)}")
                logger.error(f"ImportError 详情: {repr(e)}")
                if progress_callback:
                    progress_callback("错误: llama-cpp-python 未安装，请先安装依赖")
                return False
            except OSError as e:
                # DLL 加载失败也可能在 import 阶段发生
                logger.error(f"llama-cpp-python 导入时 DLL 加载失败: {str(e)}")
                logger.error(f"OSError 详情: winerror={getattr(e, 'winerror', 'N/A')}, errno={getattr(e, 'errno', 'N/A')}")
                try:
                    diagnostics = self._get_cpu_diagnostics()
                    logger.error(f"DLL加载失败诊断:\n{diagnostics}")
                except:
                    pass
                if progress_callback:
                    progress_callback(f"错误: llama-cpp-python DLL加载失败，可能是CPU不支持AVX2指令集或缺少VC++运行时库\n{str(e)}")
                return False
            
            if progress_callback:
                progress_callback(f"正在加载模型: {os.path.basename(model_path)}...")
            
            logger.info(f"开始加载本地模型: {model_path}")
            logger.info(f"配置: n_ctx={n_ctx}, n_gpu_layers={n_gpu_layers}, n_threads={n_threads}")
            
            # 加载模型
            self.llm = Llama(
                model_path=model_path,
                n_ctx=n_ctx,
                n_gpu_layers=n_gpu_layers,
                verbose=False,  # 减少日志输出
                n_threads=n_threads,
            )
            
            self.model_path = model_path
            self._loaded = True
            
            if progress_callback:
                progress_callback("模型加载完成！")
            
            logger.info(f"本地模型加载成功: {model_path}")
            
            # 保存配置
            self._save_config(model_path, n_ctx, n_gpu_layers, n_threads)
            
            return True
            
        except OSError as e:
            error_code = getattr(e, 'winerror', None)
            error_msg = str(e)
            
            # 记录完整的异常诊断信息
            logger.error("=" * 60)
            logger.error("本地模型加载失败 - OSError 详细诊断")
            logger.error("=" * 60)
            logger.error(f"异常类型: {type(e).__name__}")
            logger.error(f"错误信息: {error_msg}")
            logger.error(f"WinError代码: {error_code}")
            logger.error(f"errno: {getattr(e, 'errno', 'N/A')}")
            logger.error(f"strerror: {getattr(e, 'strerror', 'N/A')}")
            logger.error(f"filename: {getattr(e, 'filename', 'N/A')}")
            
            import traceback
            logger.error(f"完整堆栈:\n{traceback.format_exc()}")
            
            try:
                diagnostics = self._get_cpu_diagnostics()
                logger.error(f"系统诊断信息:\n{diagnostics}")
            except Exception as diag_err:
                logger.error(f"获取诊断信息失败: {diag_err}")
            
            # 0xc000001d = STATUS_ILLEGAL_INSTRUCTION
            # 常见原因：CPU 不支持 AVX/AVX2 指令集（老旧CPU）
            if error_code == -1073741795 or '0xc000001d' in error_msg.lower():
                friendly_msg = (
                    "您的CPU不支持本地模型所需的AVX2指令集。\n\n"
                    "这与CPU品牌（Intel/AMD）无关，而是CPU年代较老：\n"
                    "• Intel: 需要第4代酷睿(Haswell, 2013年)或更新的CPU\n"
                    "• AMD: 需要锐龙(Zen架构, 2017年)或更新的CPU\n\n"
                    "解决方案：\n"
                    "1. 请使用在线模型（DeepSeek/ChatGPT）替代本地模型\n"
                    "2. 在较新的电脑上使用本地模型功能"
                )
                logger.error("诊断结论: CPU不支持AVX2指令集，无法运行本地模型")
                logger.error("Intel需要Haswell(2013)+，AMD需要Zen(2017)+")
                if progress_callback:
                    progress_callback(f"加载失败: {friendly_msg}")
            
            # 0xc0000135 = STATUS_DLL_NOT_FOUND
            elif error_code == -1073741515 or '0xc0000135' in error_msg.lower():
                friendly_msg = (
                    "缺少必要的DLL动态链接库文件。\n\n"
                    "请下载安装 Visual C++ 2015-2022 运行时库：\n"
                    "https://aka.ms/vs/17/release/vc_redist.x64.exe\n\n"
                    "安装后重启程序即可。"
                )
                logger.error("诊断结论: 缺少DLL文件，需要安装VC++运行时库")
                if progress_callback:
                    progress_callback(f"加载失败: {friendly_msg}")
            
            # 0xc000007b = STATUS_INVALID_IMAGE_FORMAT (32/64位不匹配)
            elif error_code == -1073741701 or '0xc000007b' in error_msg.lower():
                friendly_msg = (
                    "DLL文件格式不匹配（32位/64位冲突）。\n\n"
                    "请确保：\n"
                    "1. 使用的是64位版本的程序\n"
                    "2. 已安装64位的 Visual C++ 运行时库\n"
                    "   下载地址: https://aka.ms/vs/17/release/vc_redist.x64.exe"
                )
                logger.error("诊断结论: DLL位数不匹配(32/64位冲突)")
                if progress_callback:
                    progress_callback(f"加载失败: {friendly_msg}")
            
            else:
                friendly_msg = (
                    f"加载本地模型时发生系统错误。\n\n"
                    f"错误代码: {error_code}\n"
                    f"错误信息: {error_msg}\n\n"
                    f"请尝试：\n"
                    f"1. 安装 Visual C++ 2015-2022 运行时库\n"
                    f"   https://aka.ms/vs/17/release/vc_redist.x64.exe\n"
                    f"2. 重启程序后再试\n"
                    f"3. 如仍然失败，请将日志文件发送给技术支持"
                )
                logger.error(f"加载本地模型失败 (OSError): {error_msg}")
                if progress_callback:
                    progress_callback(f"加载失败: {friendly_msg}")
            
            logger.error("=" * 60)
            self._loaded = False
            self.llm = None
            return False
        except Exception as e:
            # 其他未知异常
            logger.error("=" * 60)
            logger.error("本地模型加载失败 - 未知异常详细诊断")
            logger.error("=" * 60)
            logger.error(f"异常类型: {type(e).__name__}")
            logger.error(f"错误信息: {str(e)}")
            
            import traceback
            logger.error(f"完整堆栈:\n{traceback.format_exc()}")
            
            try:
                diagnostics = self._get_cpu_diagnostics()
                logger.error(f"系统诊断信息:\n{diagnostics}")
            except Exception as diag_err:
                logger.error(f"获取诊断信息失败: {diag_err}")
            
            friendly_msg = (
                f"加载本地模型时发生错误: {str(e)}\n\n"
                f"请尝试：\n"
                f"1. 确认模型文件是否完整（未损坏）\n"
                f"2. 安装 Visual C++ 2015-2022 运行时库\n"
                f"   https://aka.ms/vs/17/release/vc_redist.x64.exe\n"
                f"3. 重启程序后再试"
            )
            if progress_callback:
                progress_callback(f"加载失败: {friendly_msg}")
            
            logger.error("=" * 60)
            self._loaded = False
            self.llm = None
            return False
        finally:
            self._loading = False
    
    def unload(self):
        """卸载模型，释放内存"""
        if self.llm:
            logger.info("正在卸载本地模型...")
            try:
                del self.llm
            except:
                pass
            self.llm = None
        self._loaded = False
        self.model_path = None
        logger.info("本地模型已卸载")
    
    def generate(self, prompt: str, max_tokens: int = 200, temperature: float = 0.1) -> str:
        """
        生成文本（线程安全：通过 _inference_lock 防止并发访问 llama-cpp-python）
        
        Args:
            prompt: 输入提示
            max_tokens: 最大生成token数
            temperature: 温度参数
        
        Returns:
            生成的文本
        """
        if not self._loaded or self.llm is None:
            raise RuntimeError("本地模型未加载")
        
        with self._inference_lock:
            try:
                response = self.llm.create_chat_completion(
                    messages=[
                        {"role": "system", "content": "你是一个文件分类助手，请根据文件内容和文件名判断它应该归类到哪个文件夹。只回答文件夹名称，不要解释。"},
                        {"role": "user", "content": prompt}
                    ],
                    max_tokens=max_tokens,
                    temperature=temperature,
                    stop=["</s>", "\n\n"]
                )
                
                content = response["choices"][0]["message"]["content"]
                return content.strip()
                
            except Exception as e:
                logger.error(f"本地模型生成失败: {str(e)}")
                raise
    
    def classify_file(self, file_content: str, file_name: str, folders: List[str]) -> str:
        """
        对文件进行分类
        
        Args:
            file_content: 文件内容（前500字符）
            file_name: 文件名
            folders: 可选文件夹列表
        
        Returns:
            推荐的文件夹名称
        """
        if not folders:
            return ""
        
        # 构建简洁的 prompt
        folder_list = "\n".join([f"- {f}" for f in folders[:10]])  # 最多10个文件夹
        
        prompt = f"""根据以下文件信息，从给定的文件夹中选择最合适的一个：

文件名: {file_name}
内容摘要: {file_content[:300] if file_content else '(无内容)'}

可选文件夹:
{folder_list}

请直接回答文件夹名称（只回答名称，不要其他内容）:"""
        
        try:
            result = self.generate(prompt, max_tokens=50, temperature=0.1)
            
            # 清理结果，找到匹配的文件夹
            result = result.strip().strip('"').strip("'")
            
            # 精确匹配
            for folder in folders:
                if folder.lower() == result.lower():
                    return folder
            
            # 模糊匹配
            for folder in folders:
                if folder.lower() in result.lower() or result.lower() in folder.lower():
                    return folder
            
            # 如果没有匹配，返回第一个文件夹
            logger.warning(f"本地模型返回的文件夹 '{result}' 不在列表中，使用第一个文件夹")
            return folders[0]
            
        except Exception as e:
            logger.error(f"本地模型分类失败: {str(e)}")
            return folders[0] if folders else ""
    
    def _save_config(self, model_path: str, n_ctx: int, n_gpu_layers: int, n_threads: int = 4):
        """保存配置"""
        try:
            config = {
                "model_path": model_path,
                "n_ctx": n_ctx,
                "n_gpu_layers": n_gpu_layers,
                "n_threads": n_threads,
                "last_used": time.strftime("%Y-%m-%d %H:%M:%S")
            }
            with open(CONFIG_FILE, "w", encoding="utf-8") as f:
                json.dump(config, f, ensure_ascii=False, indent=2)
        except Exception as e:
            logger.warning(f"保存本地模型配置失败: {str(e)}")
    
    @staticmethod
    def load_config() -> Optional[Dict[str, Any]]:
        """加载配置"""
        try:
            if CONFIG_FILE.exists():
                with open(CONFIG_FILE, "r", encoding="utf-8") as f:
                    return json.load(f)
        except Exception as e:
            logger.warning(f"加载本地模型配置失败: {str(e)}")
        return None


# ═══════════════════════════════════════════════════════════
# 工具函数
# ═══════════════════════════════════════════════════════════

def get_downloaded_models() -> List[Dict[str, Any]]:
    """获取已下载的模型列表"""
    downloaded = []
    
    for model_id, info in AVAILABLE_MODELS.items():
        filepath = MODEL_DIR / info["filename"]
        if filepath.exists():
            downloaded.append({
                "id": model_id,
                "name": info["name"],
                "description": info["description"],
                "path": str(filepath),
                "size_mb": filepath.stat().st_size / 1024 / 1024
            })
    
    return downloaded


def get_available_models() -> List[Dict[str, Any]]:
    """获取所有可用模型列表（包括未下载的）"""
    models = []
    
    for model_id, info in AVAILABLE_MODELS.items():
        filepath = MODEL_DIR / info["filename"]
        models.append({
            "id": model_id,
            "name": info["name"],
            "description": info["description"],
            "size_mb": info["size_mb"],
            "memory_required_mb": info["memory_required_mb"],
            "downloaded": filepath.exists(),
            "path": str(filepath) if filepath.exists() else None,
            "recommended": info.get("recommended", False)
        })
    
    return models


def is_llama_cpp_available() -> bool:
    """检查 llama-cpp-python 是否可用"""
    try:
        from llama_cpp import Llama
        return True
    except (ImportError, OSError, Exception) as e:
        logger.warning(f"llama-cpp-python 不可用: {e}")
        return False


def get_local_model() -> LocalModel:
    """获取本地模型单例"""
    return LocalModel.get_instance()


def auto_load_last_model(progress_callback=None) -> bool:
    """启动时自动加载上次使用的模型（如果配置存在且模型文件仍在）
    
    Returns:
        是否成功加载
    """
    model = get_local_model()
    if model.is_loaded():
        logger.info("🤖 模型已加载，跳过自动加载")
        return True
    
    if not is_llama_cpp_available():
        logger.info("🤖 llama-cpp-python 不可用，跳过自动加载")
        return False
    
    config = LocalModel.load_config()
    if not config or not config.get("model_path"):
        logger.info("🤖 没有上次加载的模型配置，跳过自动加载")
        return False
    
    model_path = config["model_path"]
    if not os.path.exists(model_path):
        logger.warning(f"🤖 上次加载的模型文件不存在，跳过: {model_path}")
        return False
    
    n_ctx = config.get("n_ctx", 4096)
    n_gpu_layers = config.get("n_gpu_layers", 0)
    n_threads = config.get("n_threads", 4)
    
    logger.info(f"🤖 自动加载上次使用的模型: {os.path.basename(model_path)}")
    try:
        success = model.load(
            model_path=model_path,
            n_ctx=n_ctx,
            n_gpu_layers=n_gpu_layers,
            n_threads=n_threads,
            progress_callback=progress_callback
        )
        if success:
            logger.info(f"🤖 自动加载成功: {os.path.basename(model_path)}")
        else:
            logger.warning("🤖 自动加载失败")
        return success
    except Exception as e:
        logger.error(f"🤖 自动加载异常: {e}")
        return False


def classify_with_local_model(file_content: str, file_name: str, folders: List[str]) -> str:
    """使用本地模型进行分类（便捷函数）"""
    model = get_local_model()
    if not model.is_loaded():
        raise RuntimeError("本地模型未加载")
    return model.classify_file(file_content, file_name, folders)


# ═══════════════════════════════════════════════════════════
# LangChain 兼容包装器
# ═══════════════════════════════════════════════════════════

class LocalModelLLMWrapper:
    """
    LangChain 兼容的本地模型包装器
    
    完全兼容 LangChain 管道（prompt | llm），可以作为 ChatOpenAI 的直接替代品。
    支持 invoke()、__or__（管道操作符）等 LangChain 核心接口。
    """
    
    def __init__(self, temperature: float = 0.1):
        self.temperature = temperature
        self._model = get_local_model()
    
    def invoke(self, input_data, config=None, **kwargs):
        """
        调用模型进行推理 - 兼容 LangChain Runnable 接口
        
        Args:
            input_data: 输入（字符串、LangChain 消息列表、或 PromptValue）
        
        Returns:
            AIMessage 对象（兼容 LangChain）
        """
        if not self._model.is_loaded():
            raise RuntimeError("内置本地模型未加载，请先在设置中加载模型")
        
        # 处理输入 - 兼容多种格式
        prompt = self._extract_prompt(input_data)
        
        # 调用模型
        response = self._model.generate(
            prompt, 
            max_tokens=500, 
            temperature=self.temperature
        )
        
        # 返回 AIMessage 对象，兼容 LangChain
        try:
            from langchain_core.messages import AIMessage
            return AIMessage(content=response)
        except ImportError:
            # 如果 langchain_core 不可用，返回一个兼容对象
            return _SimpleAIMessage(content=response)
    
    def _extract_prompt(self, input_data) -> str:
        """从各种输入格式中提取提示文本"""
        # 字符串
        if isinstance(input_data, str):
            return input_data
        
        # LangChain PromptValue（由 prompt | llm 管道产生）
        if hasattr(input_data, 'to_messages'):
            messages = input_data.to_messages()
            return self._format_messages(messages)
        
        # LangChain 消息列表
        if isinstance(input_data, list):
            return self._format_messages(input_data)
        
        # 字典（可能是 {"messages": [...]}）
        if isinstance(input_data, dict):
            if 'messages' in input_data:
                return self._format_messages(input_data['messages'])
            return str(input_data)
        
        return str(input_data)
    
    def _format_messages(self, messages: list) -> str:
        """将 LangChain 消息列表格式化为文本"""
        formatted = []
        for msg in messages:
            if hasattr(msg, 'content'):
                content = msg.content
                # 标注角色
                role = getattr(msg, 'type', 'user')
                if role == 'system':
                    formatted.append(f"[System]: {content}")
                elif role == 'human':
                    formatted.append(f"[User]: {content}")
                elif role == 'ai':
                    formatted.append(f"[Assistant]: {content}")
                else:
                    formatted.append(content)
            elif isinstance(msg, dict):
                content = msg.get('content', '')
                role = msg.get('role', 'user')
                formatted.append(f"[{role}]: {content}")
            else:
                formatted.append(str(msg))
        return "\n".join(formatted)
    
    def __call__(self, input_data, **kwargs):
        """使对象可调用，兼容某些 LangChain 用法"""
        return self.invoke(input_data, **kwargs)
    
    def __or__(self, other):
        """支持管道操作符: llm | output_parser"""
        return _LLMChain(self, other)
    
    def __ror__(self, other):
        """支持管道操作符: prompt | llm"""
        return _LLMChain(other, self)


class _SimpleAIMessage:
    """简单的 AIMessage 替代品（当 langchain_core 不可用时使用）"""
    def __init__(self, content: str):
        self.content = content
        self.type = 'ai'
    
    def __str__(self):
        return self.content


class _LLMChain:
    """简单的管道链实现，兼容 LangChain 的 prompt | llm 语法"""
    
    def __init__(self, first, second):
        self.first = first
        self.second = second
    
    def invoke(self, input_data, config=None, **kwargs):
        """执行管道链"""
        # 第一步：执行 first（通常是 prompt）
        if hasattr(self.first, 'invoke'):
            intermediate = self.first.invoke(input_data, **kwargs)
        elif callable(self.first):
            intermediate = self.first(input_data)
        else:
            intermediate = input_data
        
        # 第二步：执行 second（通常是 llm）
        if hasattr(self.second, 'invoke'):
            result = self.second.invoke(intermediate, **kwargs)
        elif callable(self.second):
            result = self.second(intermediate)
        else:
            result = intermediate
        
        return result
    
    def __or__(self, other):
        """支持多级管道: prompt | llm | parser"""
        return _LLMChain(self, other)


def create_local_llm(temperature: float = 0.1) -> LocalModelLLMWrapper:
    """
    创建内置本地模型的 LLM 包装器
    
    用法与 ChatOpenAI 类似：
        llm = create_local_llm()
        response = llm.invoke("你好")
    
    支持 LangChain 管道：
        chain = prompt | llm
        response = chain.invoke({"input": "你好"})
    """
    model = get_local_model()
    if not model.is_loaded():
        raise RuntimeError("内置本地模型未加载，请先在设置中加载模型")
    
    return LocalModelLLMWrapper(temperature=temperature)


def test_builtin_model() -> tuple:
    """
    测试内置本地模型是否正常工作
    
    Returns:
        (success: bool, message: str)
    """
    try:
        model = get_local_model()
        if not model.is_loaded():
            return False, "模型未加载，请先加载模型"
        
        # 简单测试生成
        response = model.generate("你好，请回复'测试成功'", max_tokens=50, temperature=0.1)
        
        if response and len(response.strip()) > 0:
            model_name = os.path.basename(model.get_model_path()) if model.get_model_path() else "未知"
            return True, f"模型 {model_name} 测试成功！\n\n模型回复: {response.strip()}"
        else:
            return False, "模型返回空响应"
    except Exception as e:
        return False, f"测试失败: {str(e)}"


# ═══════════════════════════════════════════════════════════
# 视觉模型（图片识别）
# ═══════════════════════════════════════════════════════════

VISION_CONFIG_FILE = Path.home() / ".fileneatai" / "vision_model_config.json"

AVAILABLE_VISION_MODELS = {
    "minicpm-v-2.6": {
        "name": "MiniCPM-V 2.6 (推荐)",
        "description": "中文图片识别能力极强，支持OCR、场景描述",
        "model_url": "https://hf-mirror.com/openbmb/MiniCPM-V-2_6-gguf/resolve/main/ggml-model-Q4_K_M.gguf",
        "mmproj_url": "https://hf-mirror.com/openbmb/MiniCPM-V-2_6-gguf/resolve/main/mmproj-model-f16.gguf",
        "model_filename": "minicpm-v-2.6-Q4_K_M.gguf",
        "mmproj_filename": "minicpm-v-2.6-mmproj-f16.gguf",
        "size_mb": 4900,
        "memory_required_mb": 6144,
        "context_length": 4096,
        "chat_handler_type": "minicpmv26",
        "recommended": True
    },
    "llava-v1.5-7b": {
        "name": "LLaVA-v1.5-7B",
        "description": "经典视觉模型，英文能力强，稳定可靠",
        "model_url": "https://hf-mirror.com/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q4_k.gguf",
        "mmproj_url": "https://hf-mirror.com/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf",
        "model_filename": "llava-v1.5-7b-q4_k.gguf",
        "mmproj_filename": "llava-v1.5-7b-mmproj-f16.gguf",
        "size_mb": 4500,
        "memory_required_mb": 6144,
        "context_length": 4096,
        "chat_handler_type": "llava15",
        "recommended": False
    },
    "moondream2": {
        "name": "Moondream2 (轻量)",
        "description": "最小的视觉模型，速度快，适合简单图片识别",
        "model_url": "https://hf-mirror.com/vikhyatk/moondream2-gguf/resolve/main/moondream2-text-model-f16.gguf",
        "mmproj_url": "https://hf-mirror.com/vikhyatk/moondream2-gguf/resolve/main/moondream2-mmproj-f16.gguf",
        "model_filename": "moondream2-text-model-f16.gguf",
        "mmproj_filename": "moondream2-mmproj-f16.gguf",
        "size_mb": 1700,
        "memory_required_mb": 3072,
        "context_length": 2048,
        "chat_handler_type": "moondream2",
        "recommended": False
    }
}


class VisionModelDownloader:
    """视觉模型下载器 - 需要下载主模型和视觉投影器两个文件"""
    
    def __init__(self, model_id: str,
                 progress_callback: Optional[Callable[[int, int, float], None]] = None,
                 complete_callback: Optional[Callable[[bool, str], None]] = None):
        self.model_id = model_id
        self.model_info = AVAILABLE_VISION_MODELS.get(model_id)
        self.progress_callback = progress_callback
        self.complete_callback = complete_callback
        self._stop_flag = False
    
    def stop(self):
        self._stop_flag = True
    
    def _download_thread(self):
        """下载线程 - 顺序下载两个文件"""
        import requests
        
        if not self.model_info:
            if self.complete_callback:
                self.complete_callback(False, f"未知视觉模型: {self.model_id}")
            return
        
        try:
            # 1. 下载主模型
            model_path = self._download_single_file(
                self.model_info["model_url"],
                self.model_info["model_filename"],
                "主模型"
            )
            if model_path is None:
                return
            
            # 2. 下载视觉投影器 (mmproj)
            mmproj_path = self._download_single_file(
                self.model_info["mmproj_url"],
                self.model_info["mmproj_filename"],
                "视觉投影器"
            )
            if mmproj_path is None:
                return
            
            # 两个文件都下载完成
            logger.info(f"视觉模型下载完成: {model_path}")
            if self.complete_callback:
                self.complete_callback(True, str(model_path))
                
        except Exception as e:
            logger.error(f"下载视觉模型失败: {str(e)}")
            if self.complete_callback:
                self.complete_callback(False, f"下载失败: {str(e)}")
    
    def _download_single_file(self, url: str, filename: str, label: str) -> Optional[Path]:
        """下载单个文件，支持断点续传"""
        import requests
        
        filepath = MODEL_DIR / filename
        temp_filepath = MODEL_DIR / f"{filename}.downloading"
        
        if filepath.exists():
            logger.info(f"{label}已存在: {filepath}")
            return filepath
        
        downloaded_size = 0
        if temp_filepath.exists():
            downloaded_size = temp_filepath.stat().st_size
            logger.info(f"断点续传{label}：已下载 {downloaded_size / 1024 / 1024:.1f} MB")
        
        headers = {}
        if downloaded_size > 0:
            headers["Range"] = f"bytes={downloaded_size}-"
        
        logger.info(f"开始下载{label}: {url}")
        response = requests.get(url, headers=headers, stream=True, timeout=30)
        
        if response.status_code == 206:
            content_range = response.headers.get("Content-Range", "")
            total_size = int(content_range.split("/")[-1]) if "/" in content_range else 0
        elif response.status_code == 200:
            total_size = int(response.headers.get("Content-Length", 0))
            downloaded_size = 0
        else:
            raise Exception(f"下载{label}失败: HTTP {response.status_code}")
        
        mode = "ab" if downloaded_size > 0 else "wb"
        last_report_time = time.time()
        last_downloaded = downloaded_size
        
        with open(temp_filepath, mode) as f:
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                if self._stop_flag:
                    if self.complete_callback:
                        self.complete_callback(False, "下载已取消")
                    return None
                
                if chunk:
                    f.write(chunk)
                    downloaded_size += len(chunk)
                    
                    current_time = time.time()
                    if current_time - last_report_time >= 0.5:
                        elapsed = current_time - last_report_time
                        speed = (downloaded_size - last_downloaded) / elapsed / 1024 / 1024
                        
                        if self.progress_callback:
                            self.progress_callback(downloaded_size, total_size, speed)
                        
                        last_report_time = current_time
                        last_downloaded = downloaded_size
        
        temp_filepath.rename(filepath)
        logger.info(f"{label}下载完成: {filepath}")
        return filepath


class LocalVisionModel:
    """本地视觉模型 - 使用 llama-cpp-python 多模态功能驱动"""
    
    _instance: Optional["LocalVisionModel"] = None
    _lock = threading.Lock()
    
    def __init__(self):
        self.model_path: Optional[str] = None
        self.mmproj_path: Optional[str] = None
        self.llm = None
        self._loaded = False
        self._loading = False
        self._handler_type: Optional[str] = None
        self._inference_lock = threading.Lock()  # llama-cpp-python 视觉推理不支持并发调用
    
    @classmethod
    def get_instance(cls) -> "LocalVisionModel":
        if cls._instance is None:
            with cls._lock:
                if cls._instance is None:
                    cls._instance = cls()
        return cls._instance
    
    def is_loaded(self) -> bool:
        return self._loaded and self.llm is not None
    
    def is_loading(self) -> bool:
        return self._loading
    
    def get_model_path(self) -> Optional[str]:
        return self.model_path if self._loaded else None
    
    def load(self, model_path: str, mmproj_path: str, handler_type: str = "llava15",
             n_ctx: int = 4096, n_gpu_layers: int = None, n_threads: int = 4,
             progress_callback: Optional[Callable[[str], None]] = None) -> bool:
        """加载视觉模型"""
        if self._loading:
            logger.warning("视觉模型正在加载中...")
            return False
        
        # 自动检测 GPU
        if n_gpu_layers is None:
            n_gpu_layers = get_recommended_gpu_layers()
            logger.info(f"🎮 视觉模型 GPU 自动检测: n_gpu_layers={n_gpu_layers}")
        
        try:
            self._loading = True
            
            if self._loaded:
                self.unload()
            
            if progress_callback:
                progress_callback("正在初始化 llama-cpp-python...")
            
            try:
                from llama_cpp import Llama
            except ImportError as e:
                logger.error(f"llama-cpp-python 未安装: {str(e)}")
                if progress_callback:
                    progress_callback("错误: llama-cpp-python 未安装")
                return False
            
            # 根据 handler_type 选择对应的 ChatHandler
            if progress_callback:
                progress_callback("正在加载视觉投影器...")
            
            chat_handler = self._create_chat_handler(handler_type, mmproj_path)
            if chat_handler is None:
                if progress_callback:
                    progress_callback("错误: 无法创建视觉处理器")
                return False
            
            if progress_callback:
                progress_callback(f"正在加载视觉模型: {os.path.basename(model_path)}...")
            
            logger.info(f"开始加载视觉模型: {model_path}")
            logger.info(f"视觉投影器: {mmproj_path}")
            logger.info(f"Handler: {handler_type}, n_ctx={n_ctx}, n_gpu_layers={n_gpu_layers}")
            
            self.llm = Llama(
                model_path=model_path,
                chat_handler=chat_handler,
                n_ctx=n_ctx,
                n_gpu_layers=n_gpu_layers,
                n_threads=n_threads,
                verbose=False,
            )
            
            self.model_path = model_path
            self.mmproj_path = mmproj_path
            self._handler_type = handler_type
            self._loaded = True
            
            if progress_callback:
                progress_callback("视觉模型加载完成！")
            
            logger.info(f"✅ 视觉模型加载成功: {model_path}")
            
            self._save_config(model_path, mmproj_path, handler_type, n_ctx, n_gpu_layers, n_threads)
            return True
            
        except Exception as e:
            logger.error(f"加载视觉模型失败: {str(e)}")
            if progress_callback:
                progress_callback(f"加载失败: {str(e)}")
            self._loaded = False
            self.llm = None
            return False
        finally:
            self._loading = False
    
    def _create_chat_handler(self, handler_type: str, mmproj_path: str):
        """创建对应的多模态 ChatHandler"""
        try:
            if handler_type == "minicpmv26":
                try:
                    from llama_cpp.llama_chat_format import MiniCPMv26ChatHandler
                    return MiniCPMv26ChatHandler(clip_model_path=mmproj_path, verbose=False)
                except (ImportError, AttributeError):
                    logger.warning("MiniCPMv26ChatHandler 不可用，回退到 Llava15ChatHandler")
            
            if handler_type == "moondream2":
                try:
                    from llama_cpp.llama_chat_format import MoondreamChatHandler
                    return MoondreamChatHandler(clip_model_path=mmproj_path, verbose=False)
                except (ImportError, AttributeError):
                    logger.warning("MoondreamChatHandler 不可用，回退到 Llava15ChatHandler")
            
            if handler_type == "llava16":
                try:
                    from llama_cpp.llama_chat_format import Llava16ChatHandler
                    return Llava16ChatHandler(clip_model_path=mmproj_path, verbose=False)
                except (ImportError, AttributeError):
                    logger.warning("Llava16ChatHandler 不可用，回退到 Llava15ChatHandler")
            
            # 默认使用 Llava15
            from llama_cpp.llama_chat_format import Llava15ChatHandler
            return Llava15ChatHandler(clip_model_path=mmproj_path, verbose=False)
            
        except Exception as e:
            logger.error(f"创建视觉处理器失败: {str(e)}")
            return None
    
    def recognize_image(self, image_path: str,
                        prompt: str = "请详细识别这张图片中的所有文字内容",
                        max_tokens: int = 1000) -> str:
        """识别图片内容"""
        if not self._loaded or self.llm is None:
            raise RuntimeError("视觉模型未加载")
        
        import base64
        
        with open(image_path, "rb") as f:
            image_data = base64.b64encode(f.read()).decode("utf-8")
        
        ext = os.path.splitext(image_path)[1].lower()
        mime_map = {
            ".jpg": "jpeg", ".jpeg": "jpeg", ".png": "png",
            ".gif": "gif", ".webp": "webp", ".bmp": "bmp"
        }
        mime = mime_map.get(ext, "jpeg")
        
        with self._inference_lock:
            response = self.llm.create_chat_completion(
                messages=[{
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {
                            "url": f"data:image/{mime};base64,{image_data}"
                        }}
                    ]
                }],
                max_tokens=max_tokens,
                temperature=0.1
            )
        
        content = response["choices"][0]["message"]["content"]
        return content.strip()
    
    def unload(self):
        """卸载视觉模型"""
        with self._inference_lock:
            if self.llm:
                logger.info("正在卸载视觉模型...")
                try:
                    del self.llm
                except:
                    pass
                self.llm = None
            self._loaded = False
            self.model_path = None
            self.mmproj_path = None
            self._handler_type = None
            logger.info("视觉模型已卸载")
    
    def _save_config(self, model_path: str, mmproj_path: str, handler_type: str,
                     n_ctx: int, n_gpu_layers: int, n_threads: int):
        try:
            config = {
                "model_path": model_path,
                "mmproj_path": mmproj_path,
                "handler_type": handler_type,
                "n_ctx": n_ctx,
                "n_gpu_layers": n_gpu_layers,
                "n_threads": n_threads,
                "last_used": time.strftime("%Y-%m-%d %H:%M:%S")
            }
            with open(VISION_CONFIG_FILE, "w", encoding="utf-8") as f:
                json.dump(config, f, ensure_ascii=False, indent=2)
        except Exception as e:
            logger.warning(f"保存视觉模型配置失败: {str(e)}")
    
    @staticmethod
    def load_config() -> Optional[Dict[str, Any]]:
        try:
            if VISION_CONFIG_FILE.exists():
                with open(VISION_CONFIG_FILE, "r", encoding="utf-8") as f:
                    return json.load(f)
        except Exception:
            pass
        return None


# ═══════════════════════════════════════════════════════════
# 视觉模型工具函数
# ═══════════════════════════════════════════════════════════

def get_vision_model() -> LocalVisionModel:
    """获取视觉模型单例"""
    return LocalVisionModel.get_instance()


def auto_load_last_vision_model(progress_callback=None) -> bool:
    """启动时自动加载上次使用的视觉模型（如果配置存在且模型文件仍在）"""
    model = get_vision_model()
    if model.is_loaded():
        logger.info("👁 视觉模型已加载，跳过自动加载")
        return True
    
    if not is_llama_cpp_available():
        logger.info("👁 llama-cpp-python 不可用，跳过视觉模型自动加载")
        return False
    
    config = LocalVisionModel.load_config()
    if not config or not config.get("model_path"):
        logger.info("👁 没有上次加载的视觉模型配置，跳过自动加载")
        return False
    
    model_path = config["model_path"]
    mmproj_path = config.get("mmproj_path", "")
    
    if not os.path.exists(model_path):
        logger.warning(f"👁 上次的视觉模型文件不存在，跳过: {model_path}")
        return False
    if mmproj_path and not os.path.exists(mmproj_path):
        logger.warning(f"👁 上次的视觉投影器文件不存在，跳过: {mmproj_path}")
        return False
    
    handler_type = config.get("handler_type", "llava15")
    n_ctx = config.get("n_ctx", 4096)
    n_gpu_layers = config.get("n_gpu_layers", 0)
    n_threads = config.get("n_threads", 4)
    
    logger.info(f"👁 自动加载上次使用的视觉模型: {os.path.basename(model_path)}")
    try:
        success = model.load(
            model_path=model_path,
            mmproj_path=mmproj_path,
            handler_type=handler_type,
            n_ctx=n_ctx,
            n_gpu_layers=n_gpu_layers,
            n_threads=n_threads,
            progress_callback=progress_callback
        )
        if success:
            logger.info(f"👁 视觉模型自动加载成功: {os.path.basename(model_path)}")
        else:
            logger.warning("👁 视觉模型自动加载失败")
        return success
    except Exception as e:
        logger.error(f"👁 视觉模型自动加载异常: {e}")
        return False


def get_available_vision_models() -> List[Dict[str, Any]]:
    """获取所有可用视觉模型列表"""
    models = []
    for model_id, info in AVAILABLE_VISION_MODELS.items():
        model_filepath = MODEL_DIR / info["model_filename"]
        mmproj_filepath = MODEL_DIR / info["mmproj_filename"]
        both_downloaded = model_filepath.exists() and mmproj_filepath.exists()
        models.append({
            "id": model_id,
            "name": info["name"],
            "description": info["description"],
            "size_mb": info["size_mb"],
            "memory_required_mb": info["memory_required_mb"],
            "downloaded": both_downloaded,
            "model_path": str(model_filepath) if model_filepath.exists() else None,
            "mmproj_path": str(mmproj_filepath) if mmproj_filepath.exists() else None,
            "recommended": info.get("recommended", False),
            "chat_handler_type": info.get("chat_handler_type", "llava15"),
        })
    return models


def recognize_image_with_local_vision(image_path: str, max_chars: int = 5000) -> str:
    """使用本地视觉模型识别图片 - 供 ocr_optimizer 调用"""
    model = get_vision_model()
    if not model.is_loaded():
        return ""
    
    try:
        text = model.recognize_image(
            image_path,
            prompt="请详细识别这张图片中的所有文字内容，包括中文和英文。只返回文字内容，不要解释。",
            max_tokens=1500
        )
        return text[:max_chars] if text else ""
    except Exception as e:
        logger.error(f"本地视觉模型识别失败: {str(e)}")
        return ""


def test_builtin_vision_model() -> tuple:
    """
    测试内置视觉模型是否正常工作
    
    Returns:
        (success: bool, message: str)
    """
    try:
        model = get_vision_model()
        if not model.is_loaded():
            return False, "视觉模型未加载，请先加载模型"
        
        model_name = os.path.basename(model.get_model_path()) if model.get_model_path() else "未知"
        return True, (
            f"视觉模型 {model_name} 已成功加载！\n\n"
            f"模型处于就绪状态，遇到图片时将自动使用本地视觉模型进行识别。\n"
            f"无需网络连接，完全离线运行。"
        )
    except Exception as e:
        return False, f"测试失败: {str(e)}"

