| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
- from langchain.chains.combine_documents import create_stuff_documents_chain
- from langchain_core.prompts import (
- ChatPromptTemplate,
- MessagesPlaceholder,
- PromptTemplate,
- )
- from langchain_ollama import OllamaLLM
- from chroma_manager import ChromaManager
- from config import OLLAMA_URL
- print("Инициализация LLM...")
- llm = OllamaLLM(
- model="llama3.1:8b",
- base_url=f"{OLLAMA_URL}",
- temperature=0.15,
- num_predict=1024,
- reasoning=False,
- )
- print("Инициализация Chroma...")
- retriever = ChromaManager().retriever()
- contextualize_q_system_prompt = (
- "Given a chat history and the latest user question "
- "which might reference context in the chat history, "
- "formulate a standalone question which can be understood "
- "without the chat history. Do NOT answer the question, just "
- "reformulate it if needed and otherwise return it as is."
- )
- contextualize_q_prompt = ChatPromptTemplate.from_messages(
- [
- ("system", contextualize_q_system_prompt),
- MessagesPlaceholder("chat_history"),
- ("human", "{input}"),
- ]
- )
- history_aware_retriever = create_history_aware_retriever(
- llm, retriever, contextualize_q_prompt
- )
- qa_system_prompt = (
- "You are an assistant for question-answering tasks. Use "
- "the following pieces of retrieved context to answer the "
- "question. If you don't know the answer, just say that you "
- "don't know. Use three sentences maximum and keep the answer "
- "concise."
- "{context}"
- )
- qa_prompt = ChatPromptTemplate.from_messages(
- [
- ("system", qa_system_prompt),
- MessagesPlaceholder("chat_history"),
- ("human", "{input}"),
- ]
- )
- question_answer_chain = create_stuff_documents_chain(
- llm, qa_prompt, document_prompt=PromptTemplate.from_template("{answer}")
- )
- rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
- chat_history = []
- def ask(question: str):
- print("=" * 100)
- print("Вопрос пользователя:", question)
- result = {"question": question, "answer": ""}
- print("=" * 100)
- print("Ответ модели:")
- for chunk in rag_chain.stream(
- {"input": question, "chat_history": chat_history},
- config={"callbacks": [langfuse_handler]},
- ):
- if "answer" in chunk:
- print(chunk["answer"], end="", flush=True)
- result["answer"] += chunk["answer"]
- print()
- chat_history.append(("human", result["question"]))
- chat_history.append(("ai", result["answer"]))
- def main():
- questions = [
- # 'Какие есть боссы в Террарии?',
- # 'Какой финальный босс?',
- # 'И как его победить?',
- # 'Какую броню на него использовать?',
- "What bosses are there in Terraria?",
- "What is the final boss?",
- "And how to defeat it?",
- "What armor should be used against it?",
- ]
- for question in questions:
- ask(question)
- if __name__ == "__main__":
- main()
|