""" Tests for embeddings and retrieval functionality. """ import pytest import os from app.core.embedding_service import OpenAIEmbeddingService from app.core.static_memory import InMemoryMemoryService @pytest.fixture def embedding_service(): """Create an embedding service (mocked for tests).""" # In real tests, we would mock the OpenAI API # For now, we return the service but tests will be skipped if no API key return OpenAIEmbeddingService() @pytest.fixture def memory_service(embedding_service): """Create an in-memory memory service.""" return InMemoryMemoryService(embedding_service) @pytest.mark.asyncio async def test_embedding_generation(embedding_service, request): """Test embedding generation.""" if not request.config.getoption("--run-integration"): pytest.skip("Requires OpenAI API key and --run-integration flag") texts = ["This is a test.", "Another test message."] embeddings = await embedding_service.get_embeddings(texts) assert len(embeddings) == 2 assert all(isinstance(emb, list) for emb in embeddings) assert all(len(emb) > 0 for emb in embeddings) @pytest.mark.asyncio async def test_memory_add_and_query(memory_service, request): """Test adding chunks and querying memory.""" if not request.config.getoption("--run-integration"): pytest.skip("Requires OpenAI API key and --run-integration flag") # Add chunks chunks = [ {"chunk_text": "Python is a programming language.", "token_count": 6}, {"chunk_text": "JavaScript is used for web development.", "token_count": 7}, ] vector_ids = await memory_service.add_chunks( document_id="test-doc-1", chunks=chunks, metadata={"source": "test"} ) assert len(vector_ids) == 2 # Query results = await memory_service.query("programming language", top_k=1) assert len(results) > 0 assert "Python" in results[0]["chunk_text"] @pytest.mark.asyncio async def test_memory_delete(memory_service, request): """Test deleting document from memory.""" if not request.config.getoption("--run-integration"): pytest.skip("Requires OpenAI API key and --run-integration flag") chunks = [ {"chunk_text": "Test content.", "token_count": 2}, ] await memory_service.add_chunks( document_id="test-doc-2", chunks=chunks, metadata={} ) await memory_service.delete_document("test-doc-2") results = await memory_service.query("Test content", top_k=5) matching = [r for r in results if r.get("doc_id") == "test-doc-2"] assert len(matching) == 0