<?php
/**
 * PII Detection Service
 * Main orchestrator class that brings together all components
 * for complete PII identification in documents
 */

namespace Redact\Classes;

class PIIDetectionService
{
    private TextractService $textractService;
    private ComprehendService $comprehendService;
    private RegistryManager $registryManager;
    private FileHandler $fileHandler;
    private ThreadManager $threadManager;
    
    private float $startTime;
    private array $config;
    private ?string $threadId = null;
    
    public function __construct(array $awsCredentials, array $config = [])
    {
        $region = $config['region'] ?? 'us-east-1';
        
        $this->textractService = new TextractService(
            $awsCredentials['key'],
            $awsCredentials['secret'],
            $region
        );
        
        $this->comprehendService = new ComprehendService(
            $awsCredentials['key'],
            $awsCredentials['secret'],
            $region
        );
        
        // Initialize ThreadManager
        $dataDir = $config['data_dir'] ?? __DIR__ . '/../../data';
        $threadExpiryDays = $config['thread_expiry_days'] ?? 30;
        $this->threadManager = new ThreadManager($dataDir, $threadExpiryDays);
        
        // RegistryManager will be initialized per-request with thread context
        $this->registryManager = new RegistryManager();
        
        $maxFileSize = $config['max_file_size'] ?? 5242880; // 5MB
        $this->fileHandler = new FileHandler($maxFileSize);
        
        $this->config = $config;
    }
    
    /**
     * Process document from file path and detect PII
     *
     * @param string $filePath Path to document file
     * @param string $threadId Thread ID for data segregation (REQUIRED)
     * @return array Complete analysis results
     */
    public function processDocument(string $filePath, string $threadId): array
    {
        $this->startTime = microtime(true);
        
        // Step 1: Validate thread
        if (!$this->threadManager->isThreadValid($threadId)) {
            return ['success' => false, 'error' => 'Invalid or expired thread ID'];
        }
        
        $this->threadId = $threadId;
        
        // Update thread activity
        $this->threadManager->updateThreadActivity($threadId);
        
        // Initialize RegistryManager with thread context (loads cached data)
        $cacheDir = $this->threadManager->getThreadCacheDirectory($threadId);
        $this->registryManager = new RegistryManager($threadId, $cacheDir);
        
        // Step 2: Validate file exists
        if (!file_exists($filePath)) {
            return ['success' => false, 'error' => 'File not found: ' . $filePath];
        }
        
        $fileName = basename($filePath);
        $fileSize = filesize($filePath);
        
        // Initialize fileInfo early to avoid undefined variable errors
        $fileInfo = [
            'name' => $fileName,
            'size' => $fileSize,
            'detected_type' => 'unknown',
            'page_count' => 0
        ];
        
        // Step 2: Validate file size
        $maxSize = $this->config['max_file_size'] ?? 5242880; // 5MB default
        if ($fileSize > $maxSize) {
            $maxMB = round($maxSize / 1024 / 1024, 1);
            return ['success' => false, 'error' => "File size exceeds {$maxMB}MB limit"];
        }
        
        // Step 3: Get cache statistics before processing
        $cacheStatsBefore = $this->registryManager->getCacheStatistics();
        
        // Step 4: Convert to images (FileHandler intelligently detects file type)
        $conversion = $this->fileHandler->convertToImages($filePath, $fileName);
        
        if (!$conversion['success']) {
            return $conversion;
        }
        
        $images = $conversion['images'];
        // Update fileInfo with conversion results
        $fileInfo['detected_type'] = $conversion['detected_type'] ?? 'unknown';
        $fileInfo['page_count'] = count($images);
        
        // Step 3: Build registries from Textract
        $allPages = [];
        foreach ($images as $pageIndex => $imageData) {
            $pageNum = $pageIndex + 1;
            
            // Analyze with Textract (LAYOUT + TABLES + FORMS)
            $textractResult = $this->textractService->analyzeDocumentFull($imageData);
            
            if (!$textractResult['success']) {
                return [
                    'success' => false,
                    'error' => "Textract failed on page $pageNum",
                    'details' => $textractResult
                ];
            }
            
            $blocks = $textractResult['data']['Blocks'] ?? [];
            
            // Build registries
            [$pageLayouts, $pageWords] = $this->registryManager->buildRegistriesFromBlocks($blocks, $pageNum);
            
            $allPages[] = [
                'page_number' => $pageNum,
                'layouts' => $pageLayouts,
                'word_blocks' => $pageWords,
                'image_data' => 'data:image/jpeg;base64,' . base64_encode($imageData),
                'textract_data' => $textractResult['data']  // Store for table extraction
            ];
        }
        
        // Step 4: Determine which layouts need processing
        $layoutsToProcess = $this->registryManager->getLayoutsToProcess();
        
        // Step 5: Process layouts through Comprehend
        $comprehendCalls = 0;
        foreach ($layoutsToProcess as $layoutId) {
            $layout = $this->registryManager->getLayout($layoutId);
            if (!$layout || empty($layout['text'])) continue;
            
            $layoutText = $layout['text'];
            
            $comprehendResult = $this->comprehendService->detectPiiEntities($layoutText);
            $comprehendCalls++;
            
            if ($comprehendResult['success']) {
                $entities = $comprehendResult['data']['Entities'] ?? [];
                
                // Map PII back to word blocks
                $this->registryManager->mapPIIToWordBlocks($entities, $layoutId, $layoutText);
            }
        }
        
        // Step 6: Apply PII to all pages and extract tables
        $totalPIIInstances = 0;
        $allTables = [];
        foreach ($allPages as &$page) {
            $piiBlocks = $this->registryManager->applyPIIToPage($page);
            $page['pii_blocks'] = $piiBlocks;
            $page['pii_count'] = count($piiBlocks);
            $totalPIIInstances += count($piiBlocks);
            
            // Extract tables from this page's Textract data
            if (isset($page['textract_data'])) {
                $tables = $this->textractService->parseTables($page['textract_data']);
                $pageTables = [];
                foreach ($tables as $tableIndex => $table) {
                    $tableData = [
                        'table_index' => $tableIndex + 1,
                        'rows' => $table['rowCount'],
                        'columns' => $table['columnCount'],
                        'confidence' => $table['confidence'],
                        'data' => $table['rows']
                    ];
                    $pageTables[] = $tableData;
                    $allTables[] = [
                        'page' => $page['page_number'],
                        'table_index' => $tableIndex + 1,
                        'rows' => $table['rowCount'],
                        'columns' => $table['columnCount'],
                        'confidence' => $table['confidence']
                    ];
                }
                $page['tables'] = $pageTables;
                // Clean up textract_data as we don't need it in final response
                unset($page['textract_data']);
            } else {
                $page['tables'] = [];
            }
        }
        unset($page);
        
        // Step 7: Save cached registry data for future use
        $this->registryManager->saveCachedRegistry();
        $cacheStatsAfter = $this->registryManager->getCacheStatistics();
        
        // Step 8: Update thread statistics
        $this->threadManager->incrementThreadStats($threadId, [
            'document_count' => 1,
            'total_pii_found' => $totalPIIInstances,
            'total_api_calls' => $comprehendCalls
        ]);
        
        // Step 9: Calculate statistics
        $stats = $this->registryManager->getStatistics();
        $processingTime = round((microtime(true) - $this->startTime) * 1000, 2);
        
        return [
            'success' => true,
            'thread_id' => $threadId,
            'processing_time' => $processingTime . 'ms',
            'file_info' => $fileInfo,
            'total_pages' => count($allPages),
            'layout_count' => $stats['total_layouts'],
            'layouts_processed' => count($layoutsToProcess),
            'layouts_skipped' => $stats['total_layouts'] - count($layoutsToProcess),
            'unique_words' => $stats['unique_words'],
            'total_words' => $stats['total_words'],
            'pii_words' => $stats['pii_words'],
            'total_pii_instances' => $totalPIIInstances,
            'total_tables' => count($allTables),
            'comprehend_calls' => $comprehendCalls,
            'optimization_rate' => $stats['total_layouts'] > 0 
                ? round((($stats['total_layouts'] - $comprehendCalls) / $stats['total_layouts']) * 100, 1) 
                : 0,
            'cache' => [
                'before' => $cacheStatsBefore,
                'after' => $cacheStatsAfter,
                'words_learned' => $cacheStatsAfter['cached_words'] - $cacheStatsBefore['cached_words']
            ],
            'tables' => $allTables,
            'pages' => $allPages
        ];
    }
    
    /**
     * Process uploaded document from $_FILES array
     *
     * @param array $uploadedFile File from $_FILES
     * @param string $threadId Thread ID for data segregation (REQUIRED)
     * @return array Complete analysis results
     */
    public function processUploadedFile(array $uploadedFile, string $threadId): array
    {
        // Validate uploaded file
        $validation = $this->fileHandler->validateFile($uploadedFile);
        if (!$validation['success']) {
            return $validation;
        }
        
        // Process the temporary uploaded file path
        return $this->processDocument($uploadedFile['tmp_name'], $threadId);
    }
    
    /**
     * Get thread manager instance
     *
     * @return ThreadManager
     */
    public function getThreadManager(): ThreadManager
    {
        return $this->threadManager;
    }
    
    /**
     * Create a new thread
     *
     * @param array $metadata Optional metadata
     * @return array Thread creation result
     */
    public function createThread(array $metadata = []): array
    {
        return $this->threadManager->createThread($metadata);
    }
}

