| 12345678910111213141516171819202122232425262728293031 |
- from langchain_chroma import Chroma
- from langchain_huggingface import HuggingFaceEmbeddings
- from src.config import CHROMA_HOST, CHROMA_PORT
- # TODO: Сделать логирование и heartbeat
- class ChromaManager:
- def __init__(
- self,
- embeddings: str = "Qwen/Qwen3-Embedding-0.6B", # Qwen/Qwen3-Embedding-4B
- collection: str = "default",
- batch_size: int = 2500,
- host: str = CHROMA_HOST, # pyright: ignore[reportArgumentType]
- port: int = CHROMA_PORT, # pyright: ignore[reportArgumentType]
- ):
- self.collection = collection
- self.embeddings = HuggingFaceEmbeddings(model=embeddings, show_progress=True)
- self.vectordb = Chroma(
- embedding_function=self.embeddings,
- collection_name=self.collection,
- host=host,
- port=port,
- )
- def is_empty(self) -> bool:
- data = self.vectordb.get(include=["metadatas"])["metadatas"]
- return len(data) == 0
- def insert(self, documents) -> None:
- self.vectordb.add_documents(documents)
|