# -*- coding: utf-8 -*-
"""
自定义提示词优先级模块 - 确保用户自定义提示词具有最高优先级
"""

import os
import re
from typing import Dict, List, Optional, Any
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
from logger import logger

from lib.ai import FileClassification, classify_document, match_existing_categories
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from pydantic import BaseModel, Field

# LLM 调用超时时间（秒）
LLM_INVOKE_TIMEOUT = 60


class CustomPromptClassifier:
    """自定义提示词优先级分类器"""
    
    def __init__(self):
        self.custom_prompt_indicators = [
            "按照", "根据", "必须", "一定要", "要求", "需要",
            "according to", "must", "should", "required", "need to"
        ]
    
    def has_strong_custom_prompt(self, custom_prompt: str) -> bool:
        """检查是否有强烈的自定义要求"""
        if not custom_prompt:
            return False
        
        custom_prompt_lower = custom_prompt.lower()
        
        # 检查是否包含强烈的指示词
        strong_indicators = [
            "必须", "一定要", "强制", "只能", "禁止", "不允许",
            "must", "only", "never", "always", "required", "mandatory"
        ]
        
        return any(indicator in custom_prompt_lower for indicator in strong_indicators)
    
    def classify_with_custom_prompt_priority(self, file_path: str, content: str, 
                                           llm: ChatOpenAI, custom_prompt: str,
                                           reference_items: dict = None,
                                           existing_categories: List[str] = None,
                                           context_analysis: Dict[str, Any] = None,
                                           language: str = "Chinese") -> FileClassification:
        """以自定义提示词为最高优先级进行分类"""
        
        if not custom_prompt or not custom_prompt.strip():
            # 如果没有自定义提示词，使用标准分类
            return self._standard_classification(file_path, content, llm, reference_items, language)
        
        logger.info(f"使用自定义提示词优先级分类: {file_path}")
        logger.info(f"自定义提示词: {custom_prompt}")
        
        # 构建以自定义提示词为核心的分类提示
        classification_prompt = self._build_custom_prompt_focused_prompt(
            custom_prompt, file_path, content, reference_items, 
            existing_categories, context_analysis, language
        )
        
        # 构建分析内容
        analysis_content = self._build_analysis_content_for_custom_prompt(
            file_path, content, reference_items, context_analysis
        )
        
        try:
            # 首先尝试匹配现有分类（如果有的话）
            if existing_categories and self._should_try_existing_match(custom_prompt):
                matched_category = self._match_existing_with_custom_prompt(
                    analysis_content, existing_categories, llm, custom_prompt, language
                )
                if matched_category and matched_category not in ["未匹配", "No Match"]:
                    logger.info(f"自定义提示词匹配到现有分类: {matched_category}")
                    return FileClassification(
                        category=matched_category,
                        subcategory="",
                        confidence=0.9  # 高置信度，因为遵循了用户要求
                    )
            
            # 执行基于自定义提示词的新分类
            result = self._execute_custom_prompt_classification(
                classification_prompt, analysis_content, llm, language
            )
            
            if result:
                logger.info(f"自定义提示词分类完成: {file_path} -> {result.category}")
                return result
            else:
                logger.warning(f"自定义提示词分类失败，使用标准分类: {file_path}")
                return self._standard_classification(file_path, content, llm, reference_items, language)
                
        except Exception as e:
            logger.error(f"自定义提示词分类出错: {str(e)}")
            return self._standard_classification(file_path, content, llm, reference_items, language)
    
    def _build_custom_prompt_focused_prompt(self, custom_prompt: str, file_path: str, 
                                          content: str, reference_items: dict,
                                          existing_categories: List[str],
                                          context_analysis: Dict[str, Any],
                                          language: str) -> str:
        """构建以自定义提示词为核心的分类提示"""
        
        prompt_parts = []
        
        if language == "Chinese":
            # 强调用户要求的重要性
            prompt_parts.append("""你是一个专业的文档分类专家。

🚨🚨🚨 最高优先级命令 - 用户自定义提示词具有绝对权威 🚨🚨🚨

用户已经明确提出了分类要求，这是绝对命令！你必须：
1. 100%严格按照用户的要求进行分类
2. 用户的提示词优先级高于任何其他规则
3. 如果用户指定了分类方式，必须完全遵守
4. 不允许有任何偏离用户意图的行为

🎯 用户的分类要求（必须严格执行）：""")
            prompt_parts.append(f"""
╔══════════════════════════════════════════════════════════════╗
║  {custom_prompt}
╚══════════════════════════════════════════════════════════════╝""")
            
            prompt_parts.append("""
⛔ 绝对禁止（违反将导致分类失败）：
1. 【禁止】使用"未分类"、"其他"、"杂项"、"临时"等模糊分类
2. 【禁止】忽视或弱化用户的自定义要求
3. 【禁止】用自己的判断替代用户的明确指示
4. 【禁止】过度细分，创建太多小类别

✅ 必须执行：
1. 【强制】严格按照用户指定的分类规则、名称、格式执行
2. 【强制】如果用户指定了分类名称，必须使用完全相同的名称
3. 【强制】如果用户指定了分类逻辑，必须按照该逻辑处理
4. 【强制】每个文件都必须有一个有意义的分类结果
5. 【强制】宽泛分类优先，相似文件归入同一大类

🎯 用户提示词智能理解（按以下方式解读用户意图）：

📅 时间相关提示词识别：
- "按时间"、"按日期"、"按月份"、"按年份" → 使用文件修改时间分类（如"2025年1月"、"2024年"）
- "最近的文件"、"新文件" → 分析文件时间戳

📁 类型相关提示词识别：
- "按类型"、"按格式"、"按扩展名" → 按文件类型分类（文档、图片、视频、音频等）
- "文档"、"图片"、"视频" → 将对应类型文件归入该分类

📂 项目/主题相关提示词识别：
- "按项目"、"按工作" → 识别项目名称或工作相关文件
- "按主题"、"按内容" → 分析文件内容确定主题
- 具体项目名称（如"XX项目"）→ 将相关文件归入该项目

📏 大小相关提示词识别：
- "按大小"、"大文件"、"小文件" → 按文件大小范围分类

🔤 自定义分类名称识别：
- 如果用户在提示词中明确列出了分类名称，直接使用这些名称
- 例如用户说"分为工作、生活、学习三类"，就只使用这三个分类

💡 当不确定时：
1. 首先理解用户提示词的核心意图
2. 然后选择最符合该意图的分类方式
3. 保持分类数量在5-10个之间
4. 绝对不能使用"未分类"！""")
            
        else:
            prompt_parts.append("""You are a professional document classification expert.

🚨🚨🚨 HIGHEST PRIORITY COMMAND - MUST EXECUTE UNCONDITIONALLY 🚨🚨🚨

The user has explicitly stated classification requirements. This is an ABSOLUTE COMMAND that you must follow 100% strictly with NO deviation allowed!

🎯 User's Classification Requirements (MUST STRICTLY EXECUTE):""")
            prompt_parts.append(f"""
╔══════════════════════════════════════════════════════════════╗
║  {custom_prompt}
╚══════════════════════════════════════════════════════════════╝""")
            
            prompt_parts.append("""
⛔ ABSOLUTELY FORBIDDEN (violation will cause classification failure):
1. [FORBIDDEN] Using "Unclassified", "Other", "Miscellaneous", "Temporary" or any vague categories
2. [FORBIDDEN] Ignoring or weakening user's custom requirements
3. [FORBIDDEN] Replacing user's explicit instructions with your own judgment

✅ MUST EXECUTE:
1. [MANDATORY] Strictly follow user-specified classification rules, names, and formats
2. [MANDATORY] If user specifies category names, use the EXACT same names
3. [MANDATORY] If user specifies classification logic, follow that logic exactly
4. [MANDATORY] Every file MUST have a meaningful classification result

🎯 Smart Recognition of Special Requirements:
- User wants time-based classification → Use file modification date (e.g., "2025/10", "October 2025")
- User wants type-based classification → Use file extension types (e.g., "Documents", "Images", "Videos")
- User wants topic-based classification → Analyze filename and content to determine topic
- User wants size-based classification → Use file size ranges (e.g., "Large", "Medium", "Small")
- User wants project-based classification → Identify project names in filename or path

💡 When uncertain: Make the most reasonable judgment based on the spirit of user's requirements, but NEVER use "Unclassified"!""")
        
        # 添加现有分类信息（如果有）
        if existing_categories:
            if language == "Chinese":
                prompt_parts.append(f"\n📂 现有分类选项：\n{', '.join(existing_categories)}")
                prompt_parts.append("\n💡 提示：如果用户要求与现有分类匹配，优先使用现有分类；如果用户要求创建新分类，则创建新分类。")
            else:
                prompt_parts.append(f"\n📂 Existing Categories:\n{', '.join(existing_categories)}")
                prompt_parts.append("\n💡 Tip: If user requirements match existing categories, use existing categories; if user requirements need new categories, create new categories.")
        
        # 添加上下文信息（如果有）
        if context_analysis:
            directory_purpose = context_analysis.get('directory_purpose')
            if directory_purpose:
                if language == "Chinese":
                    prompt_parts.append(f"\n📁 目录上下文：{directory_purpose}")
                else:
                    prompt_parts.append(f"\n📁 Directory Context: {directory_purpose}")
        
        # 输出格式要求
        if language == "Chinese":
            prompt_parts.append("""
📤 输出格式要求：
请返回JSON格式的分类结果，包含以下字段：
- category: 主要分类（必须符合用户要求）
- subcategory: 子分类（如果用户要求中有具体说明）
- confidence: 置信度（按照用户要求分类时应为0.85-0.95）

示例格式：
{
  "category": "按用户要求的分类名称",
  "subcategory": "具体子分类或general",
  "confidence": 0.9
}""")
        else:
            prompt_parts.append("""
📤 Output Format Requirements:
Please return classification results in JSON format with the following fields:
- category: Main category (must comply with user requirements)
- subcategory: Subcategory (if specified in user requirements)
- confidence: Confidence level (should be 0.85-0.95 when classifying according to user requirements)

Example format:
{
  "category": "Category name as per user requirements",
  "subcategory": "Specific subcategory or general",
  "confidence": 0.9
}""")
        
        return "\n".join(prompt_parts)
    
    def _build_analysis_content_for_custom_prompt(self, file_path: str, content: str,
                                                reference_items: dict,
                                                context_analysis: Dict[str, Any]) -> str:
        """为自定义提示词分类构建分析内容"""
        
        content_parts = []
        
        # 文件基本信息
        filename = os.path.basename(file_path)
        content_parts.append(f"文件名: {filename}")
        
        file_extension = Path(file_path).suffix.lower()
        if file_extension:
            content_parts.append(f"文件类型: {file_extension}")
        
        # 🎯 获取文件时间信息（对于时间相关的自定义提示词很重要）
        try:
            import datetime
            
            # 获取文件的修改时间、创建时间等
            if os.path.exists(file_path):
                # 修改时间
                mtime = os.path.getmtime(file_path)
                mtime_str = datetime.datetime.fromtimestamp(mtime).strftime('%Y/%m/%d %H:%M:%S')
                content_parts.append(f"文件修改时间: {mtime_str}")
                
                # 年月信息（用于按时间分类）
                year_month = datetime.datetime.fromtimestamp(mtime).strftime('%Y/%m')
                content_parts.append(f"修改年月: {year_month}")
                
                # 创建时间（Windows）
                try:
                    ctime = os.path.getctime(file_path)
                    ctime_str = datetime.datetime.fromtimestamp(ctime).strftime('%Y/%m/%d %H:%M:%S')
                    content_parts.append(f"文件创建时间: {ctime_str}")
                    
                    # 创建年月
                    create_year_month = datetime.datetime.fromtimestamp(ctime).strftime('%Y/%m')
                    content_parts.append(f"创建年月: {create_year_month}")
                except:
                    pass
                    
                # 文件大小
                file_size = os.path.getsize(file_path)
                if file_size < 1024:
                    size_str = f"{file_size} bytes"
                elif file_size < 1024 * 1024:
                    size_str = f"{file_size / 1024:.1f} KB"
                else:
                    size_str = f"{file_size / (1024 * 1024):.1f} MB"
                content_parts.append(f"文件大小: {size_str}")
                
        except Exception as e:
            logger.warning(f"获取文件时间信息失败: {str(e)}")
        
        # 根据参考项设置决定包含的内容
        use_content = reference_items.get('content', True) if reference_items else True
        use_filename = reference_items.get('filename', False) if reference_items else False
        
        if use_filename or not use_content:
            # 如果重点关注文件名，提供更多文件名分析
            content_parts.append(f"重点分析文件名: {filename}")
        
        if use_content and content:
            # 提供内容摘要
            content_preview = content[:1000] if len(content) > 1000 else content
            content_parts.append(f"文件内容预览: {content_preview}")
            
            if len(content) > 1000:
                content_parts.append(f"... (内容已截取，总长度: {len(content)} 字符)")
        
        # 添加上下文信息
        if context_analysis:
            related_files = context_analysis.get('related_files', [])
            if related_files:
                related_names = [os.path.basename(f) for f in related_files[:3]]
                content_parts.append(f"相关文件: {', '.join(related_names)}")
        
        return "\n".join(content_parts)
    
    def _should_try_existing_match(self, custom_prompt: str) -> bool:
        """判断是否应该尝试匹配现有分类"""
        if not custom_prompt:
            return True
        
        # 如果用户明确要求创建新分类，就不要匹配现有的
        create_new_indicators = [
            "新建", "创建", "新的", "create new", "new category", "new folder"
        ]
        
        custom_prompt_lower = custom_prompt.lower()
        return not any(indicator in custom_prompt_lower for indicator in create_new_indicators)
    
    def _match_existing_with_custom_prompt(self, analysis_content: str, 
                                         existing_categories: List[str],
                                         llm: ChatOpenAI, custom_prompt: str,
                                         language: str) -> Optional[str]:
        """基于自定义提示词匹配现有分类"""
        
        try:
            if language == "Chinese":
                system_prompt = f"""你是一个专业的文档分类专家。用户提出了特定的分类要求，请根据用户要求从现有分类中选择最合适的分类。

用户的分类要求：
{custom_prompt}

任务：
1. 仔细理解用户的分类要求
2. 从给定的分类列表中选择最符合用户要求的分类
3. 如果没有完全匹配的分类，返回"未匹配"
4. 只返回一个分类名称，不要有任何其他文本

重要：严格按照用户要求进行匹配，用户的要求是最高优先级！"""
            else:
                system_prompt = f"""You are a professional document classification expert. The user has specific classification requirements. Please select the most appropriate classification from existing categories based on user requirements.

User's Classification Requirements:
{custom_prompt}

Tasks:
1. Carefully understand the user's classification requirements
2. Select the classification that best meets user requirements from the given list
3. If no perfect match exists, return "No Match"
4. Only return one classification name, no other text

Important: Strictly match according to user requirements, user requirements are the highest priority!"""
            
            categories_text = "、".join(existing_categories) if language == "Chinese" else ", ".join(existing_categories)
            user_prompt = f"可选分类: {categories_text}\n\n文件信息:\n{analysis_content}" if language == "Chinese" else f"Available categories: {categories_text}\n\nFile information:\n{analysis_content}"
            
            messages = [
                SystemMessage(content=system_prompt),
                HumanMessage(content=user_prompt)
            ]
            
            # 🔧 添加超时保护，防止 LLM 调用永久阻塞
            response = None
            with ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(llm.invoke, messages)
                try:
                    response = future.result(timeout=LLM_INVOKE_TIMEOUT)
                except FuturesTimeoutError:
                    logger.warning(f"⚠️ LLM调用超时（{LLM_INVOKE_TIMEOUT}秒）")
                    return None
            
            if response is None:
                return None
                
            result = response.content.strip()
            
            # 验证结果
            if result in existing_categories:
                return result
            
            # 模糊匹配（考虑到可能的格式差异）
            result_lower = result.lower()
            for category in existing_categories:
                if category.lower() in result_lower or result_lower in category.lower():
                    return category
            
            return None
            
        except Exception as e:
            logger.error(f"自定义提示词匹配现有分类失败: {str(e)}")
            return None
    
    def _execute_custom_prompt_classification(self, classification_prompt: str,
                                            analysis_content: str, llm: ChatOpenAI,
                                            language: str) -> Optional[FileClassification]:
        """执行基于自定义提示词的分类"""
        
        # 🔧 修复：检查LLM类型，对OpenAI兼容的模型使用结构化输出
        model_type = llm.__class__.__name__
        is_openai_compatible = model_type in ["ChatOpenAI", "OpenAI"]
        
        logger.info(f"🎯 检测到LLM类型: {model_type}, OpenAI兼容: {is_openai_compatible}")
        
        # 🎯 重要修复：对于第三方API，优先使用更稳定的文本解析方式
        # 因为第三方API的结构化输出支持可能不稳定
        logger.info(f"🎯 为了确保稳定性，对所有模型都使用文本解析方式")
        return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        
        # 以下代码保留，但暂时不使用结构化输出（因为第三方API支持不稳定）
        """
        if is_openai_compatible:
            try:
                # 只对OpenAI兼容的模型使用结构化输出
                structured_llm = llm.with_structured_output(FileClassification)
                
                messages = [
                    SystemMessage(content=classification_prompt),
                    HumanMessage(content=f"请根据用户的自定义要求对以下文件进行分类:\n\n{analysis_content}" if language == "Chinese" else f"Please classify the following file according to user's custom requirements:\n\n{analysis_content}")
                ]
                
                result = structured_llm.invoke(messages)
                
                if result and hasattr(result, 'category'):
                    # 确保置信度合理（用户明确要求时应该有较高置信度）
                    if result.confidence < 0.8:
                        result.confidence = 0.85  # 提升置信度，因为这是按用户要求分类的
                    
                    return result
                else:
                    # 回退到文本解析
                    return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
                    
            except Exception as e:
                logger.warning(f"OpenAI结构化输出失败，回退到文本解析: {str(e)}")
                return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        else:
            # 对于非OpenAI模型（如Ollama），直接使用文本解析
            logger.info(f"检测到非OpenAI模型 ({model_type})，使用文本解析方式")
            return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        """
    
    def _parse_custom_prompt_response(self, classification_prompt: str, analysis_content: str,
                                    llm: ChatOpenAI, language: str) -> Optional[FileClassification]:
        """解析自定义提示词分类响应 - 🔧 优化错误处理和兼容性"""
        
        max_retries = 2
        for attempt in range(max_retries):
            try:
                messages = [
                    SystemMessage(content=classification_prompt),
                    HumanMessage(content=f"请返回JSON格式的分类结果:\n\n{analysis_content}" if language == "Chinese" else f"Please return classification result in JSON format:\n\n{analysis_content}")
                ]
                
                # 🔧 添加超时保护，防止 LLM 调用永久阻塞导致程序崩溃
                response = None
                with ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(llm.invoke, messages)
                    try:
                        response = future.result(timeout=LLM_INVOKE_TIMEOUT)
                    except FuturesTimeoutError:
                        logger.warning(f"⚠️ LLM调用超时（{LLM_INVOKE_TIMEOUT}秒），尝试 {attempt + 1}/{max_retries}")
                        continue
                
                if response is None:
                    continue
                response_text = response.content.strip()
                
                # 尝试解析JSON
                import json
                json_match = re.search(r'\{[^}]*\}', response_text)
                if json_match:
                    try:
                        json_str = json_match.group()
                        data = json.loads(json_str)
                        
                        category = data.get('category', '待整理文档' if language == "Chinese" else 'Documents to Organize')
                        subcategory = data.get('subcategory', '')
                        confidence = float(data.get('confidence', 0.85))
                        
                        return FileClassification(
                            category=category,
                            subcategory=subcategory,
                            confidence=max(confidence, 0.8)  # 确保自定义分类有足够的置信度
                        )
                    except Exception as parse_error:
                        logger.warning(f"JSON解析失败 (尝试 {attempt + 1}/{max_retries}): {str(parse_error)}")
                
                # 简单文本解析
                lines = response_text.split('\n')
                category = '待整理文档' if language == "Chinese" else 'Documents to Organize'
                subcategory = ''
                confidence = 0.85
                
                for line in lines:
                    line = line.strip()
                    if '类别' in line or 'category' in line.lower():
                        category = line.split(':', 1)[-1].strip().strip('"\'')
                    elif '子类别' in line or 'subcategory' in line.lower():
                        subcategory = line.split(':', 1)[-1].strip().strip('"\'')
                    elif '置信度' in line or 'confidence' in line.lower():
                        try:
                            conf_str = line.split(':', 1)[-1].strip()
                            confidence = float(re.findall(r'[\d.]+', conf_str)[0])
                        except:
                            pass
                
                return FileClassification(
                    category=category,
                    subcategory=subcategory,
                    confidence=max(confidence, 0.8)
                )
                
            except Exception as e:
                error_msg = str(e)
                
                # 检查是否是API密钥错误
                if "401" in error_msg or "invalid_api_key" in error_msg:
                    logger.error(f"API密钥无效，无法继续自定义提示词分类: {error_msg}")
                    return None  # 直接返回，不再重试
                
                # 检查是否是网络连接错误
                if "connection" in error_msg.lower() or "timeout" in error_msg.lower():
                    if attempt < max_retries - 1:
                        logger.warning(f"网络错误，重试 {attempt + 1}/{max_retries}: {error_msg}")
                        import time
                        time.sleep(1)  # 等待1秒后重试
                        continue
                
                logger.error(f"解析自定义提示词响应失败 (尝试 {attempt + 1}/{max_retries}): {error_msg}")
                
                if attempt == max_retries - 1:
                    # 最后一次尝试失败，返回默认分类（但绝不使用"未分类"）
                    logger.warning("自定义提示词分类完全失败，返回待整理分类")
                    return FileClassification(
                        category='待整理文档' if language == "Chinese" else 'Documents to Organize',
                        subcategory='',
                        confidence=0.5  # 较低置信度表示这是默认分类
                    )
        
        return None
    
    def _standard_classification(self, file_path: str, content: str, llm: ChatOpenAI,
                               reference_items: dict, language: str) -> FileClassification:
        """标准分类方法（回退方案）"""
        
        try:
            filename = os.path.basename(file_path) if file_path else None
            return classify_document(
                content=content,
                llm=llm,
                custom_prompt="",  # 不使用自定义提示词
                filename=filename,
                reference_items=reference_items,
                language=language
            )
        except Exception as e:
            logger.error(f"标准分类失败 {file_path}: {str(e)}")
            # 根据文件扩展名推断一个合理的默认分类，而不是使用"其他"
            if file_path:
                ext = Path(file_path).suffix.lower()
                default_categories = {
                    # 图片
                    '.jpg': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    '.jpeg': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    '.png': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    '.gif': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    '.webp': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    '.bmp': ('图片素材' if language == "Chinese" else 'Images', 'general'),
                    # 文档
                    '.pdf': ('PDF文档' if language == "Chinese" else 'PDF Documents', 'general'),
                    '.doc': ('Word文档' if language == "Chinese" else 'Word Documents', 'general'),
                    '.docx': ('Word文档' if language == "Chinese" else 'Word Documents', 'general'),
                    '.xls': ('Excel表格' if language == "Chinese" else 'Excel Spreadsheets', 'general'),
                    '.xlsx': ('Excel表格' if language == "Chinese" else 'Excel Spreadsheets', 'general'),
                    '.ppt': ('PPT演示' if language == "Chinese" else 'PowerPoint', 'general'),
                    '.pptx': ('PPT演示' if language == "Chinese" else 'PowerPoint', 'general'),
                    '.txt': ('文本文件' if language == "Chinese" else 'Text Files', 'general'),
                    # 视频
                    '.mp4': ('视频文件' if language == "Chinese" else 'Videos', 'general'),
                    '.avi': ('视频文件' if language == "Chinese" else 'Videos', 'general'),
                    '.mov': ('视频文件' if language == "Chinese" else 'Videos', 'general'),
                    '.mkv': ('视频文件' if language == "Chinese" else 'Videos', 'general'),
                    # 音频
                    '.mp3': ('音频文件' if language == "Chinese" else 'Audio', 'general'),
                    '.wav': ('音频文件' if language == "Chinese" else 'Audio', 'general'),
                    '.flac': ('音频文件' if language == "Chinese" else 'Audio', 'general'),
                    # 压缩包
                    '.zip': ('压缩文件' if language == "Chinese" else 'Archives', 'general'),
                    '.rar': ('压缩文件' if language == "Chinese" else 'Archives', 'general'),
                    '.7z': ('压缩文件' if language == "Chinese" else 'Archives', 'general'),
                }
                if ext in default_categories:
                    cat, subcat = default_categories[ext]
                    return FileClassification(category=cat, subcategory=subcat, confidence=0.6)
            
            # 最后的默认值 - 使用"待整理"而非"其他"
            return FileClassification(
                category='待整理文件' if language == "Chinese" else 'Files to Organize',
                subcategory='',
                confidence=0.5
            )


# 全局自定义提示词分类器实例
_custom_prompt_classifier = None

def get_custom_prompt_classifier() -> CustomPromptClassifier:
    """获取自定义提示词分类器实例"""
    global _custom_prompt_classifier
    if _custom_prompt_classifier is None:
        _custom_prompt_classifier = CustomPromptClassifier()
    return _custom_prompt_classifier


def classify_with_custom_prompt_priority(file_path: str, content: str, llm: ChatOpenAI,
                                       custom_prompt: str, reference_items: dict = None,
                                       existing_categories: List[str] = None,
                                       context_analysis: Dict[str, Any] = None,
                                       language: str = "Chinese") -> FileClassification:
    """使用自定义提示词优先级进行分类的入口函数"""
    
    classifier = get_custom_prompt_classifier()
    return classifier.classify_with_custom_prompt_priority(
        file_path, content, llm, custom_prompt, reference_items,
        existing_categories, context_analysis, language
    )


# 导出主要函数
__all__ = [
    'CustomPromptClassifier',
    'get_custom_prompt_classifier',
    'classify_with_custom_prompt_priority'
]

