import csv
import os
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
import tempfile
import shutil
from typing import List, Dict, Any

from app.services.text_extractor import extract_text
from app.services.phrase_extractor import generate_ngrams
from app.ml.inference import detect_tortured_phrases, load_tortured_phrases
from app.schemas.response import AnalysisResponse, Detection

router = APIRouter()

def get_original_fingerprint_for_expected_text(expected_text: str) -> str:
    """
    Retrieve the original fingerprint for an expected text from the CSV file.
    This function looks through the fingerprints.csv to find the original fingerprint
    that maps to the given expected text.
    """
    csv_path = os.path.join(os.path.dirname(__file__), "../../data/fingerprints.csv")
    with open(csv_path, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            fingerprint = row["Fingerprint - Tortured Phrase"]
            expected = row["Expected Text"]
            if expected.lower().strip() == expected_text.lower().strip():
                return fingerprint
    return expected_text  # fallback if not found

def transform_detections_to_new_format(detections: List[Dict[str, Any]], text: str) -> List[Dict[str, Any]]:
    """
    Transform detections to the new format specified by the user:
    {
        "rule": {
            "torturedPhraseFingerprint": "...",
            "expectedText": "..."
        },
        "phrasesFound": [
            {
                "position": ...,
                "match": "..."
            }
        ]
    }
    """
    # Group detections by matched_with (expected text)
    grouped_detections = {}

    for det in detections:
        expected_text = det["matched_with"]

        # Get the original fingerprint for this expected text
        original_fingerprint = get_original_fingerprint_for_expected_text(expected_text)

        if expected_text not in grouped_detections:
            grouped_detections[expected_text] = {
                "rule": {
                    "torturedPhraseFingerprint": original_fingerprint,
                    "expectedText": expected_text
                },
                "phrasesFound": []
            }

        # Add the detected phrase to phrasesFound
        grouped_detections[expected_text]["phrasesFound"].append({
            "position": det["start"],
            "match": det["phrase"]
        })

    # Convert back to list format
    result = list(grouped_detections.values())

    return result

@router.post("/detect-new-format")
async def detect_document_new_format(file: UploadFile = File(...)):
    """
    Upload a document and detect tortured phrases.
    Returns response in the new format:
    {
        "result": [
            {
                "rule": {
                    "torturedPhraseFingerprint": "...",
                    "expectedText": "..."
                },
                "phrasesFound": [
                    {
                        "position": ...,
                        "match": "..."
                    }
                ]
            }
        ]
    }
    """
    # File type check
    if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
        raise HTTPException(status_code=400, detail="Unsupported file type")

    # Save file temporarily
    suffix = os.path.splitext(file.filename)[1]
    tmp_dir = tempfile.mkdtemp()
    tmp_path = os.path.join(tmp_dir, file.filename)

    with open(tmp_path, "wb") as f:
        f.write(await file.read())

    try:
        # Extract text
        text = extract_text(tmp_path)

        # Generate candidate phrases
        candidate_phrases = generate_ngrams(text)

        # Detect tortured phrases
        detections = detect_tortured_phrases(candidate_phrases)

        # Transform detections to new format
        transformed_detections = transform_detections_to_new_format(detections, text)

        # Build response in new format
        response = {
            "result": transformed_detections
        }

        return JSONResponse(content=response)

    finally:
        # Cleanup temp files
        try:
            shutil.rmtree(tmp_dir)
        except:
            pass

def generate_documentation_report(detections: List[Dict[str, Any]], transformed_detections: List[Dict[str, Any]], filename: str, text: str) -> Dict[str, Any]:
    """
    Generate a comprehensive documentation report from the detection results.
    This creates a human-readable format that explains the findings in detail.
    """
    # Count occurrences of each type of tortured phrase
    phrase_counts = {}
    for det in detections:
        expected_text = det["matched_with"]
        if expected_text not in phrase_counts:
            phrase_counts[expected_text] = 0
        phrase_counts[expected_text] += 1

    # Create a detailed report
    report = {
        "document_analysis_report": {
            "file_name": filename,
            "analysis_timestamp": "2025-12-29T00:00:00Z",  # In a real implementation, use actual timestamp
            "summary": {
                "total_text_length": len(text),
                "total_phrases_analyzed": len(set([det["phrase"] for det in detections])),  # Unique phrases
                "total_tortured_phrases_found": len(detections),
                "unique_tortured_phrase_types": len(phrase_counts),
                "accuracy_confidence": "High" if len(detections) > 0 else "No tortured phrases detected"
            },
            "detailed_findings": [
                {
                    "expected_phrase": expected_text,
                    "occurrences": count,
                    "examples": [det["phrase"] for det in detections if det["matched_with"] == expected_text][:5]  # First 5 examples
                }
                for expected_text, count in phrase_counts.items()
            ],
            "recommendations": [
                "Review and replace all detected tortured phrases with their correct academic terminology",
                "Ensure consistency in academic language throughout the document",
                f"Consider reviewing the {len(detections)} identified instances for proper correction"
            ]
        }
    }

    return report


@router.post("/detect-docs-and-json")
async def detect_document_docs_and_json(file: UploadFile = File(...)):
    """
    Upload a document and get responses in both JSON and documentation format.
    JSON format matches the specified structure.
    Documentation format provides human-readable information.
    """
    # File type check
    if not file.filename.lower().endswith((".pdf", ".docx", ".txt")):
        raise HTTPException(status_code=400, detail="Unsupported file type")

    # Save file temporarily
    tmp_dir = tempfile.mkdtemp()
    tmp_path = os.path.join(tmp_dir, file.filename)

    with open(tmp_path, "wb") as f:
        f.write(await file.read())

    try:
        # Extract text
        text = extract_text(tmp_path)

        # Generate candidate phrases
        candidate_phrases = generate_ngrams(text)

        # Detect tortured phrases
        detections = detect_tortured_phrases(candidate_phrases)

        # Transform detections to new format
        transformed_detections = transform_detections_to_new_format(detections, text)

        # Build JSON response in the requested format
        json_response = {
            "result": transformed_detections
        }

        # Generate documentation report
        docs_response = generate_documentation_report(detections, transformed_detections, file.filename, text)

        # Return both formats (the primary response will be JSON)
        return JSONResponse(content={
            "json_format": json_response,
            "docs_format": docs_response,
            "metadata": {
                "api_version": "1.0.0",
                "processing_time": "N/A",  # Would be calculated in a real implementation
                "total_detections": len(detections)
            }
        })

    finally:
        # Cleanup temp files
        try:
            shutil.rmtree(tmp_dir)
        except:
            pass