<?php
/**
 * Test API with Table Extraction
 * Tests the unified LAYOUT + TABLES + FORMS extraction
 */

// Test file path
$testFile = __DIR__ . '/../testing/samples/BeytekinS Payslips.pdf';

if (!file_exists($testFile)) {
    die("Error: Test file not found at $testFile\n");
}

echo "========================================\n";
echo "API Table Extraction Test\n";
echo "========================================\n\n";

// Read and encode file
$fileData = file_get_contents($testFile);
$base64Data = base64_encode($fileData);

echo "File: " . basename($testFile) . "\n";
echo "Size: " . number_format(strlen($fileData)) . " bytes\n";
echo "Base64 Size: " . number_format(strlen($base64Data)) . " bytes\n\n";

// Step 1: Create thread
echo "Step 1: Creating thread...\n";
$ch = curl_init('http://localhost/redact/api/v1/thread_create.php');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
    'metadata' => [
        'test' => 'table_extraction',
        'file' => basename($testFile)
    ]
]));

$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ($httpCode !== 201) {
    die("Failed to create thread. HTTP $httpCode\nResponse: $response\n");
}

$threadData = json_decode($response, true);
$threadId = $threadData['thread_id'];
$privateKey = $threadData['private_key'];

echo "✓ Thread created: $threadId\n\n";

// Step 2: Process file
echo "Step 2: Processing file with PII + Table extraction...\n";
echo "This will take ~90 seconds for 3 pages...\n\n";

$startTime = microtime(true);

$ch = curl_init('http://localhost/redact/api/v1/process_file.php');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
curl_setopt($ch, CURLOPT_TIMEOUT, 300); // 5 minutes
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
    'thread_id' => $threadId,
    'private_key' => $privateKey,
    'file_data' => $base64Data,
    'file_name' => basename($testFile)
]));

$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

$endTime = microtime(true);
$totalTime = round($endTime - $startTime, 2);

if ($httpCode !== 200) {
    echo "Failed to process file. HTTP $httpCode\n";
    echo "Response: $response\n";
    exit(1);
}

$result = json_decode($response, true);

echo "✓ Processing completed in {$totalTime}s\n\n";

// Display results
echo "========================================\n";
echo "RESULTS\n";
echo "========================================\n\n";

echo "Processing Time: {$result['processing_time']}\n";
echo "Total Pages: {$result['total_pages']}\n";
echo "Total PII Instances: {$result['total_pii_instances']}\n";
echo "Total Tables: {$result['total_tables']}\n";
echo "Comprehend Calls: {$result['comprehend_calls']}\n";
echo "Optimization Rate: {$result['optimization_rate']}%\n\n";

// Display table summary
if (!empty($result['tables'])) {
    echo "========================================\n";
    echo "TABLES EXTRACTED\n";
    echo "========================================\n\n";
    
    foreach ($result['tables'] as $table) {
        echo "Page {$table['page']}, Table {$table['table_index']}: ";
        echo "{$table['rows']} rows × {$table['columns']} columns ";
        echo "(" . number_format($table['confidence'], 1) . "% confidence)\n";
    }
    echo "\n";
}

// Display per-page summary
echo "========================================\n";
echo "PER-PAGE SUMMARY\n";
echo "========================================\n\n";

foreach ($result['pages'] as $page) {
    echo "Page {$page['page_number']}:\n";
    echo "  PII Instances: {$page['pii_count']}\n";
    echo "  Tables: " . count($page['tables']) . "\n";
    if (!empty($page['tables'])) {
        foreach ($page['tables'] as $table) {
            echo "    - Table {$table['table_index']}: {$table['rows']}×{$table['columns']} ";
            echo "(" . number_format($table['confidence'], 1) . "%)\n";
        }
    }
    echo "\n";
}

// Save full response
$outputFile = __DIR__ . '/test_tables_api_output.json';
file_put_contents($outputFile, json_encode($result, JSON_PRETTY_PRINT));
echo "Full response saved to: " . basename($outputFile) . "\n\n";

// Step 3: Clean up
echo "Step 3: Cleaning up thread...\n";
$ch = curl_init('http://localhost/redact/api/v1/thread_delete.php');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'DELETE');
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
    'thread_id' => $threadId,
    'private_key' => $privateKey
]));

$response = curl_exec($ch);
curl_close($ch);

echo "✓ Thread deleted\n\n";

echo "========================================\n";
echo "TEST COMPLETED SUCCESSFULLY!\n";
echo "========================================\n\n";

echo "Summary:\n";
echo "- Single Textract call per page (LAYOUT + TABLES + FORMS)\n";
echo "- PII Detection: {$result['total_pii_instances']} instances found\n";
echo "- Table Extraction: {$result['total_tables']} tables found\n";
echo "- Total Time: {$totalTime}s\n";
echo "- Cost per page: ~\$0.015 (vs \$0.020 for separate calls)\n\n";
?>

