""" Lazy vs Eager RAG Benchmark Usando sentence-transformers + FAISS Rodar na... · ThiagoMaciel

"""
Lazy vs Eager RAG Benchmark
Usando sentence-transformers + FAISS
Rodar na máquina local: pip install sentence-transformers faiss-cpu
"""

import faiss
import numpy as np
import time
from sentence_transformers import SentenceTransformer

# ---------------------------
# Config
# ---------------------------
RUNS = 20          # runs para média estável
DATASET_SIZE = 1000  # documentos no haystack
TOP_K_VALUES = [10, 20, 50, 100]  # testar diferentes k no lazy
MODEL_NAME = 'all-MiniLM-L6-v2'

print(f"Carregando modelo {MODEL_NAME}...")
model = SentenceTransformer(MODEL_NAME)

# ---------------------------
# Gera dataset
# ---------------------------
def generate_dataset(size):
    topics = [
        "machine learning", "physics", "chemistry", "biology",
        "history", "mathematics", "economics", "philosophy",
        "engineering", "medicine"
    ]
    docs = []
    for i in range(size):
        topic = topics[i % len(topics)]
        docs.append(f"Document {i}: information about {topic} including various concepts and applications related to {topic} research")
    return docs

# ---------------------------
# Eager Search
# ---------------------------
def eager_search(query_vec, embeddings, docs):
    start = time.time()
    sims = embeddings @ query_vec
    idx = np.argmax(sims)
    cost = time.time() - start
    return docs[idx], len(docs), cost

# ---------------------------
# Lazy Search (FAISS ANN)
# ---------------------------
def lazy_search(query_vec, index, embeddings, docs, top_k):
    start = time.time()
    query_reshaped = query_vec.reshape(1, -1)
    D, I = index.search(query_reshaped, top_k)
    candidates = embeddings[I[0]]
    sims = candidates @ query_vec
    best_local = np.argmax(sims)
    best_idx = I[0][best_local]
    cost = time.time() - start
    return docs[best_idx], top_k, cost

# ---------------------------
# Run benchmark
# ---------------------------
print(f"\nGerando dataset de {DATASET_SIZE} documentos...")
base_docs = generate_dataset(DATASET_SIZE)

results = {k: {"hits": 0, "eager_cost": 0, "lazy_cost": 0, "eager_time": 0, "lazy_time": 0} for k in TOP_K_VALUES}
eager_baseline = {"hits": 0, "cost": 0, "time": 0}

print(f"Rodando {RUNS} experimentos...\n")

for run in range(RUNS):
    # Cria dataset com agulha em posição aleatória
    docs = base_docs.copy()
    needle = f"NEEDLE: unique quantum lazy evaluation principle document run {run}"
    needle_pos = np.random.randint(0, len(docs))
    docs.insert(needle_pos, needle)
    query = "quantum lazy evaluation unique principle"

    # Gera embeddings
    all_embeddings = model.encode(docs, show_progress_bar=False)
    all_embeddings = np.array(all_embeddings).astype("float32")
    
    # Normaliza para cosine similarity via inner product
    faiss.normalize_L2(all_embeddings)

    query_vec = model.encode([query])[0].astype("float32")
    query_vec = query_vec / np.linalg.norm(query_vec)

    # Build FAISS index
    dim = all_embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(all_embeddings)

    # Eager
    eager_result, eager_comparisons, eager_time = eager_search(query_vec, all_embeddings, docs)
    if "NEEDLE" in eager_result:
        eager_baseline["hits"] += 1
    eager_baseline["cost"] += eager_comparisons
    eager_baseline["time"] += eager_time

    # Lazy com diferentes k
    for k in TOP_K_VALUES:
        lazy_result, lazy_comparisons, lazy_time = lazy_search(query_vec, index, all_embeddings, docs, k)
        if "NEEDLE" in lazy_result:
            results[k]["hits"] += 1
        results[k]["eager_cost"] += eager_comparisons
        results[k]["lazy_cost"] += lazy_comparisons
        results[k]["eager_time"] += eager_time
        results[k]["lazy_time"] += lazy_time

# ---------------------------
# Resultados
# ---------------------------
print("=" * 70)
print("RESULTADOS DO BENCHMARK — Lazy vs Eager RAG")
print(f"Dataset: {DATASET_SIZE+1} docs | Runs: {RUNS} | Modelo: {MODEL_NAME}")
print("=" * 70)

print(f"\n{'Method':<20} {'Recall':>8} {'Cost (docs)':>12} {'Time (ms)':>12} {'Speedup':>10}")
print("-" * 70)

eager_recall = eager_baseline["hits"] / RUNS
eager_avg_cost = eager_baseline["cost"] / RUNS
eager_avg_time = (eager_baseline["time"] / RUNS) * 1000

print(f"{'Eager RAG':<20} {eager_recall:>8.4f} {eager_avg_cost:>12.1f} {eager_avg_time:>12.2f} {'1.00x':>10}")

for k in TOP_K_VALUES:
    r = results[k]
    recall = r["hits"] / RUNS
    avg_cost = r["lazy_cost"] / RUNS
    avg_time = (r["lazy_time"] / RUNS) * 1000
    speedup = eager_avg_cost / avg_cost
    cost_reduction = (1 - avg_cost / eager_avg_cost) * 100
    label = f"Lazy RAG (k={k})"
    print(f"{label:<20} {recall:>8.4f} {avg_cost:>12.1f} {avg_time:>12.2f} {speedup:>9.2f}x")

print("\n" + "=" * 70)
print("ANÁLISE DE PRECISÃO vs CUSTO")
print("=" * 70)
print(f"\nEager RAG:  Recall={eager_recall:.4f} | Cost={eager_avg_cost:.0f} docs")
for k in TOP_K_VALUES:
    r = results[k]
    recall = r["hits"] / RUNS
    avg_cost = r["lazy_cost"] / RUNS
    reduction = (1 - avg_cost / eager_avg_cost) * 100
    status = "✓ PARETO DOMINANTE" if recall >= eager_recall else "✗ recall loss"
    print(f"Lazy k={k:>3}: Recall={recall:.4f} | Cost={avg_cost:.0f} docs | -{reduction:.0f}% custo | {status}")

print("\n" + "=" * 70)
print("CONCLUSÃO")
print("=" * 70)
best_k = max(TOP_K_VALUES, key=lambda k: results[k]["hits"] / RUNS)
best_recall = results[best_k]["hits"] / RUNS
best_cost = results[best_k]["lazy_cost"] / RUNS
cost_reduction = (1 - best_cost / eager_avg_cost) * 100
speedup = eager_avg_cost / best_cost
print(f"\nMelhor configuração lazy: k={best_k}")
print(f"Recall: {best_recall:.4f} (Eager: {eager_recall:.4f})")
print(f"Custo: {best_cost:.0f} docs vs {eager_avg_cost:.0f} (redução: {cost_reduction:.0f}%)")
print(f"Speedup: {speedup:.1f}x")