import time
import numpy as np
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

class SemanticCache:
    def __init__(self, threshold: float = 0.82):
        self.threshold = threshold
        self.entries: list[tuple[str, str, int, np.ndarray]] = []  # query, answer, kb_version, emb

    def get(self, q: str, kb_version: int):
        qe = model.encode(q, convert_to_tensor=True, show_progress_bar=False)
        best = (-1.0, None)
        for _, ans, ver, emb in self.entries:
            if ver != kb_version:
                continue
            sim = float(util.cos_sim(qe, emb))
            if sim > best[0]:
                best = (sim, ans)
        if best[0] >= self.threshold:
            return "HIT", best[1], best[0]
        return "MISS", None, best[0]

    def set(self, q: str, answer: str, kb_version: int):
        qe = model.encode(q, convert_to_tensor=True, show_progress_bar=False)
        self.entries.append((q, answer, kb_version, qe))

cache = SemanticCache(threshold=0.85)
cache.set("What is the refund window?", "30 days from invoice.", kb_version=1)

for q in ["How long do refunds take?", "What is the refund window?"]:
    status, ans, sim = cache.get(q, kb_version=1)
    print(repr(q), "->", status, "sim=", round(sim, 3), "ans=", ans)

print("\nAfter KB bump to v2 (invalidates v1-only entries):")
status, ans, sim = cache.get("What is the refund window?", kb_version=2)
print(status, ans, sim)
06 — Semantic Cache: Similarity, TTL, and Invalidation¶