vectordb.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. from langchain_chroma import Chroma
  2. from langchain_huggingface import HuggingFaceEmbeddings
  3. from src.config import CHROMA_HOST, CHROMA_PORT
  4. # TODO: Сделать логирование и heartbeat
  5. class ChromaManager:
  6. def __init__(
  7. self,
  8. embeddings: str = "Qwen/Qwen3-Embedding-0.6B", # Qwen/Qwen3-Embedding-4B
  9. collection: str = "default",
  10. batch_size: int = 2500,
  11. host: str = CHROMA_HOST, # pyright: ignore[reportArgumentType]
  12. port: int = CHROMA_PORT, # pyright: ignore[reportArgumentType]
  13. ):
  14. self.collection = collection
  15. self.embeddings = HuggingFaceEmbeddings(model=embeddings, show_progress=True)
  16. self.vectordb = Chroma(
  17. embedding_function=self.embeddings,
  18. collection_name=self.collection,
  19. host=host,
  20. port=port,
  21. )
  22. def is_empty(self) -> bool:
  23. data = self.vectordb.get(include=["metadatas"])["metadatas"]
  24. return len(data) == 0
  25. def insert(self, documents) -> None:
  26. self.vectordb.add_documents(documents)