# -*- coding: utf-8 -*-
"""
自定义提示词优先级模块 - 确保用户自定义提示词具有最高优先级
"""

import os
import re
from typing import Dict, List, Optional, Any
from pathlib import Path
from logger import logger

from lib.ai import FileClassification, classify_document, match_existing_categories
from langchain_openai import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from pydantic import BaseModel, Field


class CustomPromptClassifier:
    """自定义提示词优先级分类器"""
    
    def __init__(self):
        self.custom_prompt_indicators = [
            "按照", "根据", "必须", "一定要", "要求", "需要",
            "according to", "must", "should", "required", "need to"
        ]
    
    def has_strong_custom_prompt(self, custom_prompt: str) -> bool:
        """检查是否有强烈的自定义要求"""
        if not custom_prompt:
            return False
        
        custom_prompt_lower = custom_prompt.lower()
        
        # 检查是否包含强烈的指示词
        strong_indicators = [
            "必须", "一定要", "强制", "只能", "禁止", "不允许",
            "must", "only", "never", "always", "required", "mandatory"
        ]
        
        return any(indicator in custom_prompt_lower for indicator in strong_indicators)
    
    def classify_with_custom_prompt_priority(self, file_path: str, content: str, 
                                           llm: ChatOpenAI, custom_prompt: str,
                                           reference_items: dict = None,
                                           existing_categories: List[str] = None,
                                           context_analysis: Dict[str, Any] = None,
                                           language: str = "Chinese") -> FileClassification:
        """以自定义提示词为最高优先级进行分类"""
        
        if not custom_prompt or not custom_prompt.strip():
            # 如果没有自定义提示词，使用标准分类
            return self._standard_classification(file_path, content, llm, reference_items, language)
        
        logger.info(f"使用自定义提示词优先级分类: {file_path}")
        logger.info(f"自定义提示词: {custom_prompt}")
        
        # 构建以自定义提示词为核心的分类提示
        classification_prompt = self._build_custom_prompt_focused_prompt(
            custom_prompt, file_path, content, reference_items, 
            existing_categories, context_analysis, language
        )
        
        # 构建分析内容
        analysis_content = self._build_analysis_content_for_custom_prompt(
            file_path, content, reference_items, context_analysis
        )
        
        try:
            # 首先尝试匹配现有分类（如果有的话）
            if existing_categories and self._should_try_existing_match(custom_prompt):
                matched_category = self._match_existing_with_custom_prompt(
                    analysis_content, existing_categories, llm, custom_prompt, language
                )
                if matched_category and matched_category not in ["未匹配", "No Match"]:
                    logger.info(f"自定义提示词匹配到现有分类: {matched_category}")
                    return FileClassification(
                        category=matched_category,
                        subcategory="",
                        confidence=0.9  # 高置信度，因为遵循了用户要求
                    )
            
            # 执行基于自定义提示词的新分类
            result = self._execute_custom_prompt_classification(
                classification_prompt, analysis_content, llm, language
            )
            
            if result:
                logger.info(f"自定义提示词分类完成: {file_path} -> {result.category}")
                return result
            else:
                logger.warning(f"自定义提示词分类失败，使用标准分类: {file_path}")
                return self._standard_classification(file_path, content, llm, reference_items, language)
                
        except Exception as e:
            logger.error(f"自定义提示词分类出错: {str(e)}")
            return self._standard_classification(file_path, content, llm, reference_items, language)
    
    def _build_custom_prompt_focused_prompt(self, custom_prompt: str, file_path: str, 
                                          content: str, reference_items: dict,
                                          existing_categories: List[str],
                                          context_analysis: Dict[str, Any],
                                          language: str) -> str:
        """构建以自定义提示词为核心的分类提示"""
        
        prompt_parts = []
        
        if language == "Chinese":
            # 强调用户要求的重要性
            prompt_parts.append("""你是一个专业的文档分类专家。用户已经明确提出了分类要求，你必须严格按照用户的要求进行分类。

⚠️ 重要提醒：用户的自定义要求是最高优先级，必须严格遵守！

🎯 用户的分类要求：""")
            prompt_parts.append(f"「{custom_prompt}」")
            
            prompt_parts.append("""
📋 分类执行原则：
1. **用户要求优先**：严格按照用户的自定义要求进行分类
2. **精确理解**：仔细分析用户要求的具体含义和意图
3. **灵活应用**：在满足用户要求的前提下，创建合理的分类结构
4. **高置信度**：按照用户要求分类时，应给出高置信度分数（0.85-0.95）
5. **避免偏离**：不要被其他因素影响，专注于用户的明确要求

🎯 特殊要求识别：
- 如果用户要求按时间分类（如"修改时间"、"创建时间"等），请重点关注文件的时间信息
- 如果用户要求按年月分类（如"2025/10"、"2025/09"），请使用文件的修改年月或创建年月进行分类
- 如果用户要求按文件类型分类，请重点关注文件扩展名
- 如果用户要求按内容主题分类，请重点分析文件内容
- 如果用户要求按文件大小分类，请关注文件大小信息

如果用户要求中提到了具体的分类名称、分类方式或分类标准，必须严格遵循。""")
            
        else:
            prompt_parts.append("""You are a professional document classification expert. The user has explicitly stated classification requirements, and you must strictly follow the user's requirements for classification.

⚠️ Important Reminder: The user's custom requirements are the highest priority and must be strictly followed!

🎯 User's Classification Requirements:""")
            prompt_parts.append(f"「{custom_prompt}」")
            
            prompt_parts.append("""
📋 Classification Execution Principles:
1. **User Requirements First**: Strictly classify according to user's custom requirements
2. **Precise Understanding**: Carefully analyze the specific meaning and intent of user requirements
3. **Flexible Application**: Create reasonable classification structure while meeting user requirements
4. **High Confidence**: Give high confidence scores when classifying according to user requirements
5. **Avoid Deviation**: Don't be influenced by other factors, focus on user's explicit requirements

If the user requirements mention specific category names, classification methods, or classification standards, they must be strictly followed.""")
        
        # 添加现有分类信息（如果有）
        if existing_categories:
            if language == "Chinese":
                prompt_parts.append(f"\n📂 现有分类选项：\n{', '.join(existing_categories)}")
                prompt_parts.append("\n💡 提示：如果用户要求与现有分类匹配，优先使用现有分类；如果用户要求创建新分类，则创建新分类。")
            else:
                prompt_parts.append(f"\n📂 Existing Categories:\n{', '.join(existing_categories)}")
                prompt_parts.append("\n💡 Tip: If user requirements match existing categories, use existing categories; if user requirements need new categories, create new categories.")
        
        # 添加上下文信息（如果有）
        if context_analysis:
            directory_purpose = context_analysis.get('directory_purpose')
            if directory_purpose:
                if language == "Chinese":
                    prompt_parts.append(f"\n📁 目录上下文：{directory_purpose}")
                else:
                    prompt_parts.append(f"\n📁 Directory Context: {directory_purpose}")
        
        # 输出格式要求
        if language == "Chinese":
            prompt_parts.append("""
📤 输出格式要求：
请返回JSON格式的分类结果，包含以下字段：
- category: 主要分类（必须符合用户要求）
- subcategory: 子分类（如果用户要求中有具体说明）
- confidence: 置信度（按照用户要求分类时应为0.85-0.95）

示例格式：
{
  "category": "按用户要求的分类名称",
  "subcategory": "具体子分类或general",
  "confidence": 0.9
}""")
        else:
            prompt_parts.append("""
📤 Output Format Requirements:
Please return classification results in JSON format with the following fields:
- category: Main category (must comply with user requirements)
- subcategory: Subcategory (if specified in user requirements)
- confidence: Confidence level (should be 0.85-0.95 when classifying according to user requirements)

Example format:
{
  "category": "Category name as per user requirements",
  "subcategory": "Specific subcategory or general",
  "confidence": 0.9
}""")
        
        return "\n".join(prompt_parts)
    
    def _build_analysis_content_for_custom_prompt(self, file_path: str, content: str,
                                                reference_items: dict,
                                                context_analysis: Dict[str, Any]) -> str:
        """为自定义提示词分类构建分析内容"""
        
        content_parts = []
        
        # 文件基本信息
        filename = os.path.basename(file_path)
        content_parts.append(f"文件名: {filename}")
        
        file_extension = Path(file_path).suffix.lower()
        if file_extension:
            content_parts.append(f"文件类型: {file_extension}")
        
        # 🎯 获取文件时间信息（对于时间相关的自定义提示词很重要）
        try:
            import datetime
            
            # 获取文件的修改时间、创建时间等
            if os.path.exists(file_path):
                # 修改时间
                mtime = os.path.getmtime(file_path)
                mtime_str = datetime.datetime.fromtimestamp(mtime).strftime('%Y/%m/%d %H:%M:%S')
                content_parts.append(f"文件修改时间: {mtime_str}")
                
                # 年月信息（用于按时间分类）
                year_month = datetime.datetime.fromtimestamp(mtime).strftime('%Y/%m')
                content_parts.append(f"修改年月: {year_month}")
                
                # 创建时间（Windows）
                try:
                    ctime = os.path.getctime(file_path)
                    ctime_str = datetime.datetime.fromtimestamp(ctime).strftime('%Y/%m/%d %H:%M:%S')
                    content_parts.append(f"文件创建时间: {ctime_str}")
                    
                    # 创建年月
                    create_year_month = datetime.datetime.fromtimestamp(ctime).strftime('%Y/%m')
                    content_parts.append(f"创建年月: {create_year_month}")
                except:
                    pass
                    
                # 文件大小
                file_size = os.path.getsize(file_path)
                if file_size < 1024:
                    size_str = f"{file_size} bytes"
                elif file_size < 1024 * 1024:
                    size_str = f"{file_size / 1024:.1f} KB"
                else:
                    size_str = f"{file_size / (1024 * 1024):.1f} MB"
                content_parts.append(f"文件大小: {size_str}")
                
        except Exception as e:
            logger.warning(f"获取文件时间信息失败: {str(e)}")
        
        # 根据参考项设置决定包含的内容
        use_content = reference_items.get('content', True) if reference_items else True
        use_filename = reference_items.get('filename', False) if reference_items else False
        
        if use_filename or not use_content:
            # 如果重点关注文件名，提供更多文件名分析
            content_parts.append(f"重点分析文件名: {filename}")
        
        if use_content and content:
            # 提供内容摘要
            content_preview = content[:1000] if len(content) > 1000 else content
            content_parts.append(f"文件内容预览: {content_preview}")
            
            if len(content) > 1000:
                content_parts.append(f"... (内容已截取，总长度: {len(content)} 字符)")
        
        # 添加上下文信息
        if context_analysis:
            related_files = context_analysis.get('related_files', [])
            if related_files:
                related_names = [os.path.basename(f) for f in related_files[:3]]
                content_parts.append(f"相关文件: {', '.join(related_names)}")
        
        return "\n".join(content_parts)
    
    def _should_try_existing_match(self, custom_prompt: str) -> bool:
        """判断是否应该尝试匹配现有分类"""
        if not custom_prompt:
            return True
        
        # 如果用户明确要求创建新分类，就不要匹配现有的
        create_new_indicators = [
            "新建", "创建", "新的", "create new", "new category", "new folder"
        ]
        
        custom_prompt_lower = custom_prompt.lower()
        return not any(indicator in custom_prompt_lower for indicator in create_new_indicators)
    
    def _match_existing_with_custom_prompt(self, analysis_content: str, 
                                         existing_categories: List[str],
                                         llm: ChatOpenAI, custom_prompt: str,
                                         language: str) -> Optional[str]:
        """基于自定义提示词匹配现有分类"""
        
        try:
            if language == "Chinese":
                system_prompt = f"""你是一个专业的文档分类专家。用户提出了特定的分类要求，请根据用户要求从现有分类中选择最合适的分类。

用户的分类要求：
{custom_prompt}

任务：
1. 仔细理解用户的分类要求
2. 从给定的分类列表中选择最符合用户要求的分类
3. 如果没有完全匹配的分类，返回"未匹配"
4. 只返回一个分类名称，不要有任何其他文本

重要：严格按照用户要求进行匹配，用户的要求是最高优先级！"""
            else:
                system_prompt = f"""You are a professional document classification expert. The user has specific classification requirements. Please select the most appropriate classification from existing categories based on user requirements.

User's Classification Requirements:
{custom_prompt}

Tasks:
1. Carefully understand the user's classification requirements
2. Select the classification that best meets user requirements from the given list
3. If no perfect match exists, return "No Match"
4. Only return one classification name, no other text

Important: Strictly match according to user requirements, user requirements are the highest priority!"""
            
            categories_text = "、".join(existing_categories) if language == "Chinese" else ", ".join(existing_categories)
            user_prompt = f"可选分类: {categories_text}\n\n文件信息:\n{analysis_content}" if language == "Chinese" else f"Available categories: {categories_text}\n\nFile information:\n{analysis_content}"
            
            messages = [
                SystemMessage(content=system_prompt),
                HumanMessage(content=user_prompt)
            ]
            
            response = llm.invoke(messages)
            result = response.content.strip()
            
            # 验证结果
            if result in existing_categories:
                return result
            
            # 模糊匹配（考虑到可能的格式差异）
            result_lower = result.lower()
            for category in existing_categories:
                if category.lower() in result_lower or result_lower in category.lower():
                    return category
            
            return None
            
        except Exception as e:
            logger.error(f"自定义提示词匹配现有分类失败: {str(e)}")
            return None
    
    def _execute_custom_prompt_classification(self, classification_prompt: str,
                                            analysis_content: str, llm: ChatOpenAI,
                                            language: str) -> Optional[FileClassification]:
        """执行基于自定义提示词的分类"""
        
        # 🔧 修复：检查LLM类型，对OpenAI兼容的模型使用结构化输出
        model_type = llm.__class__.__name__
        is_openai_compatible = model_type in ["ChatOpenAI", "OpenAI"]
        
        logger.info(f"🎯 检测到LLM类型: {model_type}, OpenAI兼容: {is_openai_compatible}")
        
        # 🎯 重要修复：对于第三方API，优先使用更稳定的文本解析方式
        # 因为第三方API的结构化输出支持可能不稳定
        logger.info(f"🎯 为了确保稳定性，对所有模型都使用文本解析方式")
        return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        
        # 以下代码保留，但暂时不使用结构化输出（因为第三方API支持不稳定）
        """
        if is_openai_compatible:
            try:
                # 只对OpenAI兼容的模型使用结构化输出
                structured_llm = llm.with_structured_output(FileClassification)
                
                messages = [
                    SystemMessage(content=classification_prompt),
                    HumanMessage(content=f"请根据用户的自定义要求对以下文件进行分类:\n\n{analysis_content}" if language == "Chinese" else f"Please classify the following file according to user's custom requirements:\n\n{analysis_content}")
                ]
                
                result = structured_llm.invoke(messages)
                
                if result and hasattr(result, 'category'):
                    # 确保置信度合理（用户明确要求时应该有较高置信度）
                    if result.confidence < 0.8:
                        result.confidence = 0.85  # 提升置信度，因为这是按用户要求分类的
                    
                    return result
                else:
                    # 回退到文本解析
                    return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
                    
            except Exception as e:
                logger.warning(f"OpenAI结构化输出失败，回退到文本解析: {str(e)}")
                return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        else:
            # 对于非OpenAI模型（如Ollama），直接使用文本解析
            logger.info(f"检测到非OpenAI模型 ({model_type})，使用文本解析方式")
            return self._parse_custom_prompt_response(classification_prompt, analysis_content, llm, language)
        """
    
    def _parse_custom_prompt_response(self, classification_prompt: str, analysis_content: str,
                                    llm: ChatOpenAI, language: str) -> Optional[FileClassification]:
        """解析自定义提示词分类响应 - 🔧 优化错误处理和兼容性"""
        
        max_retries = 2
        for attempt in range(max_retries):
            try:
                messages = [
                    SystemMessage(content=classification_prompt),
                    HumanMessage(content=f"请返回JSON格式的分类结果:\n\n{analysis_content}" if language == "Chinese" else f"Please return classification result in JSON format:\n\n{analysis_content}")
                ]
                
                # 🔧 添加超时和错误处理
                response = llm.invoke(messages)
                response_text = response.content.strip()
                
                # 尝试解析JSON
                import json
                json_match = re.search(r'\{[^}]*\}', response_text)
                if json_match:
                    try:
                        json_str = json_match.group()
                        data = json.loads(json_str)
                        
                        category = data.get('category', '用户自定义分类' if language == "Chinese" else 'User Custom Category')
                        subcategory = data.get('subcategory', '')
                        confidence = float(data.get('confidence', 0.85))
                        
                        return FileClassification(
                            category=category,
                            subcategory=subcategory,
                            confidence=max(confidence, 0.8)  # 确保自定义分类有足够的置信度
                        )
                    except Exception as parse_error:
                        logger.warning(f"JSON解析失败 (尝试 {attempt + 1}/{max_retries}): {str(parse_error)}")
                
                # 简单文本解析
                lines = response_text.split('\n')
                category = '用户自定义分类' if language == "Chinese" else 'User Custom Category'
                subcategory = ''
                confidence = 0.85
                
                for line in lines:
                    line = line.strip()
                    if '类别' in line or 'category' in line.lower():
                        category = line.split(':', 1)[-1].strip().strip('"\'')
                    elif '子类别' in line or 'subcategory' in line.lower():
                        subcategory = line.split(':', 1)[-1].strip().strip('"\'')
                    elif '置信度' in line or 'confidence' in line.lower():
                        try:
                            conf_str = line.split(':', 1)[-1].strip()
                            confidence = float(re.findall(r'[\d.]+', conf_str)[0])
                        except:
                            pass
                
                return FileClassification(
                    category=category,
                    subcategory=subcategory,
                    confidence=max(confidence, 0.8)
                )
                
            except Exception as e:
                error_msg = str(e)
                
                # 检查是否是API密钥错误
                if "401" in error_msg or "invalid_api_key" in error_msg:
                    logger.error(f"API密钥无效，无法继续自定义提示词分类: {error_msg}")
                    return None  # 直接返回，不再重试
                
                # 检查是否是网络连接错误
                if "connection" in error_msg.lower() or "timeout" in error_msg.lower():
                    if attempt < max_retries - 1:
                        logger.warning(f"网络错误，重试 {attempt + 1}/{max_retries}: {error_msg}")
                        import time
                        time.sleep(1)  # 等待1秒后重试
                        continue
                
                logger.error(f"解析自定义提示词响应失败 (尝试 {attempt + 1}/{max_retries}): {error_msg}")
                
                if attempt == max_retries - 1:
                    # 最后一次尝试失败，返回默认分类
                    logger.warning("自定义提示词分类完全失败，返回默认分类")
                    return FileClassification(
                        category='用户自定义分类' if language == "Chinese" else 'User Custom Category',
                        subcategory='',
                        confidence=0.5  # 较低置信度表示这是默认分类
                    )
        
        return None
    
    def _standard_classification(self, file_path: str, content: str, llm: ChatOpenAI,
                               reference_items: dict, language: str) -> FileClassification:
        """标准分类方法（回退方案）"""
        
        try:
            filename = os.path.basename(file_path) if file_path else None
            return classify_document(
                content=content,
                llm=llm,
                custom_prompt="",  # 不使用自定义提示词
                filename=filename,
                reference_items=reference_items,
                language=language
            )
        except Exception as e:
            logger.error(f"标准分类失败 {file_path}: {str(e)}")
            return FileClassification(
                category='其他' if language == "Chinese" else 'Other',
                subcategory='',
                confidence=0.5
            )


# 全局自定义提示词分类器实例
_custom_prompt_classifier = None

def get_custom_prompt_classifier() -> CustomPromptClassifier:
    """获取自定义提示词分类器实例"""
    global _custom_prompt_classifier
    if _custom_prompt_classifier is None:
        _custom_prompt_classifier = CustomPromptClassifier()
    return _custom_prompt_classifier


def classify_with_custom_prompt_priority(file_path: str, content: str, llm: ChatOpenAI,
                                       custom_prompt: str, reference_items: dict = None,
                                       existing_categories: List[str] = None,
                                       context_analysis: Dict[str, Any] = None,
                                       language: str = "Chinese") -> FileClassification:
    """使用自定义提示词优先级进行分类的入口函数"""
    
    classifier = get_custom_prompt_classifier()
    return classifier.classify_with_custom_prompt_priority(
        file_path, content, llm, custom_prompt, reference_items,
        existing_categories, context_analysis, language
    )


# 导出主要函数
__all__ = [
    'CustomPromptClassifier',
    'get_custom_prompt_classifier',
    'classify_with_custom_prompt_priority'
]

