"""
Lazy vs Eager RAG Benchmark
Usando sentence-transformers + FAISS
Rodar na máquina local: pip install sentence-transformers faiss-cpu
"""
import faiss
import numpy as np
import time
from sentence_transformers import SentenceTransformer
# ---------------------------
# Config
# ---------------------------
RUNS = 20 # runs para média estável
DATASET_SIZE = 1000 # documentos no haystack
TOP_K_VALUES = [10, 20, 50, 100] # testar diferentes k no lazy
MODEL_NAME = 'all-MiniLM-L6-v2'
print(f"Carregando modelo {MODEL_NAME}...")
model = SentenceTransformer(MODEL_NAME)
# ---------------------------
# Gera dataset
# ---------------------------
def generate_dataset(size):
topics = [
"machine learning", "physics", "chemistry", "biology",
"history", "mathematics", "economics", "philosophy",
"engineering", "medicine"
]
docs = []
for i in range(size):
topic = topics[i % len(topics)]
docs.append(f"Document {i}: information about {topic} including various concepts and applications related to {topic} research")
return docs
# ---------------------------
# Eager Search
# ---------------------------
def eager_search(query_vec, embeddings, docs):
start = time.time()
sims = embeddings @ query_vec
idx = np.argmax(sims)
cost = time.time() - start
return docs[idx], len(docs), cost
# ---------------------------
# Lazy Search (FAISS ANN)
# ---------------------------
def lazy_search(query_vec, index, embeddings, docs, top_k):
start = time.time()
query_reshaped = query_vec.reshape(1, -1)
D, I = index.search(query_reshaped, top_k)
candidates = embeddings[I[0]]
sims = candidates @ query_vec
best_local = np.argmax(sims)
best_idx = I[0][best_local]
cost = time.time() - start
return docs[best_idx], top_k, cost
# ---------------------------
# Run benchmark
# ---------------------------
print(f"\nGerando dataset de {DATASET_SIZE} documentos...")
base_docs = generate_dataset(DATASET_SIZE)
results = {k: {"hits": 0, "eager_cost": 0, "lazy_cost": 0, "eager_time": 0, "lazy_time": 0} for k in TOP_K_VALUES}
eager_baseline = {"hits": 0, "cost": 0, "time": 0}
print(f"Rodando {RUNS} experimentos...\n")
for run in range(RUNS):
# Cria dataset com agulha em posição aleatória
docs = base_docs.copy()
needle = f"NEEDLE: unique quantum lazy evaluation principle document run {run}"
needle_pos = np.random.randint(0, len(docs))
docs.insert(needle_pos, needle)
query = "quantum lazy evaluation unique principle"
# Gera embeddings
all_embeddings = model.encode(docs, show_progress_bar=False)
all_embeddings = np.array(all_embeddings).astype("float32")
# Normaliza para cosine similarity via inner product
faiss.normalize_L2(all_embeddings)
query_vec = model.encode([query])[0].astype("float32")
query_vec = query_vec / np.linalg.norm(query_vec)
# Build FAISS index
dim = all_embeddings.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(all_embeddings)
# Eager
eager_result, eager_comparisons, eager_time = eager_search(query_vec, all_embeddings, docs)
if "NEEDLE" in eager_result:
eager_baseline["hits"] += 1
eager_baseline["cost"] += eager_comparisons
eager_baseline["time"] += eager_time
# Lazy com diferentes k
for k in TOP_K_VALUES:
lazy_result, lazy_comparisons, lazy_time = lazy_search(query_vec, index, all_embeddings, docs, k)
if "NEEDLE" in lazy_result:
results[k]["hits"] += 1
results[k]["eager_cost"] += eager_comparisons
results[k]["lazy_cost"] += lazy_comparisons
results[k]["eager_time"] += eager_time
results[k]["lazy_time"] += lazy_time
# ---------------------------
# Resultados
# ---------------------------
print("=" * 70)
print("RESULTADOS DO BENCHMARK — Lazy vs Eager RAG")
print(f"Dataset: {DATASET_SIZE+1} docs | Runs: {RUNS} | Modelo: {MODEL_NAME}")
print("=" * 70)
print(f"\n{'Method':<20} {'Recall':>8} {'Cost (docs)':>12} {'Time (ms)':>12} {'Speedup':>10}")
print("-" * 70)
eager_recall = eager_baseline["hits"] / RUNS
eager_avg_cost = eager_baseline["cost"] / RUNS
eager_avg_time = (eager_baseline["time"] / RUNS) * 1000
print(f"{'Eager RAG':<20} {eager_recall:>8.4f} {eager_avg_cost:>12.1f} {eager_avg_time:>12.2f} {'1.00x':>10}")
for k in TOP_K_VALUES:
r = results[k]
recall = r["hits"] / RUNS
avg_cost = r["lazy_cost"] / RUNS
avg_time = (r["lazy_time"] / RUNS) * 1000
speedup = eager_avg_cost / avg_cost
cost_reduction = (1 - avg_cost / eager_avg_cost) * 100
label = f"Lazy RAG (k={k})"
print(f"{label:<20} {recall:>8.4f} {avg_cost:>12.1f} {avg_time:>12.2f} {speedup:>9.2f}x")
print("\n" + "=" * 70)
print("ANÁLISE DE PRECISÃO vs CUSTO")
print("=" * 70)
print(f"\nEager RAG: Recall={eager_recall:.4f} | Cost={eager_avg_cost:.0f} docs")
for k in TOP_K_VALUES:
r = results[k]
recall = r["hits"] / RUNS
avg_cost = r["lazy_cost"] / RUNS
reduction = (1 - avg_cost / eager_avg_cost) * 100
status = "✓ PARETO DOMINANTE" if recall >= eager_recall else "✗ recall loss"
print(f"Lazy k={k:>3}: Recall={recall:.4f} | Cost={avg_cost:.0f} docs | -{reduction:.0f}% custo | {status}")
print("\n" + "=" * 70)
print("CONCLUSÃO")
print("=" * 70)
best_k = max(TOP_K_VALUES, key=lambda k: results[k]["hits"] / RUNS)
best_recall = results[best_k]["hits"] / RUNS
best_cost = results[best_k]["lazy_cost"] / RUNS
cost_reduction = (1 - best_cost / eager_avg_cost) * 100
speedup = eager_avg_cost / best_cost
print(f"\nMelhor configuração lazy: k={best_k}")
print(f"Recall: {best_recall:.4f} (Eager: {eager_recall:.4f})")
print(f"Custo: {best_cost:.0f} docs vs {eager_avg_cost:.0f} (redução: {cost_reduction:.0f}%)")
print(f"Speedup: {speedup:.1f}x")
0