I am unable to get the assessment running in the gradio notebook, below is my server_app.py file, please help. Also im almost out of the server times of 32 hours
@vkudlay
PS sorry for the bad code
%%writefile server_app.py
# https://python.langchain.com/docs/langserve#server
from fastapi import FastAPI
from langchain.prompts import ChatPromptTemplate
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langserve import add_routes
llm = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1")
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableBranch
from langchain_core.runnables.passthrough import RunnableAssign
from langchain.document_transformers import LongContextReorder
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from functools import partial
from operator import itemgetter
import gradio as gr
from langchain_community.vectorstores import FAISS
embedder = NVIDIAEmbeddings(
model="nvidia/embed-qa-4", truncate="END",
base_url="http://llm_client:9000/v1"
)
docstore = FAISS.load_local("docstore_index", embedder, allow_dangerous_deserialization=True)
docs = list(docstore.docstore._dict.values())
#####################################################################
# NVIDIAEmbeddings.get_available_models(base_url="http://llm_client:9000/v1")
# ChatNVIDIA.get_available_models(base_url="http://llm_client:9000/v1")
instruct_llm = ChatNVIDIA(
model="mistralai/mixtral-8x22b-instruct-v0.1",
base_url="http://llm_client:9000/v1"
)
llm = instruct_llm | StrOutputParser()
#####################################################################
def docs2str(docs, title="Document"):
"""Useful utility for making chunks into context string. Optional, but useful"""
out_str = ""
for doc in docs:
doc_name = getattr(doc, 'metadata', {}).get('Title', title)
if doc_name: out_str += f"[Quote from {doc_name}] "
out_str += getattr(doc, 'page_content', str(doc)) + "\n"
return out_str
chat_prompt = ChatPromptTemplate.from_template(
"You are a document chatbot. Help the user as they ask questions about documents."
" User messaged just asked you a question: {input}\n\n"
" The following information may be useful for your response: "
" Document Retrieval:\n{context}\n\n"
" (Answer only from retrieval. Only cite sources that are used. Make your response conversational)"
"\n\nUser Question: {input}"
)
def output_puller(inputs):
""""Output generator. Useful if your chain returns a dictionary with key 'output'"""
print('inputs', inputs)
yield inputs
'''if isinstance(inputs, dict):
inputs = [inputs]
for token in inputs:
if token.get('output'):
yield token.get('output')
else:
yield ""'''
#####################################################################
## TODO: Pull in your desired RAG Chain. Memory not necessary
## Chain 1 Specs: "Hello World" -> retrieval_chain
## -> {'input': <str>, 'context' : <str>}
long_reorder = RunnableLambda(LongContextReorder().transform_documents) ## GIVEN
##context_getter = itemgetter('input') | docstore.as_retriever() | long_reorder | docs2str ## TODO
retrieval_chain = RunnableLambda(lambda x: docstore.as_retriever())
##retrieval_chain = {'input' : (lambda x: x)} | RunnableAssign({'context' : context_getter})
## Chain 2 Specs: retrieval_chain -> generator_chain
## -> {"output" : <str>, ...} -> output_puller
generator_chain = chat_prompt | llm ## TODO
#generator_chain = RunnableAssign({'output' : generator_chain})# | output_puller ## GIVEN
## END TODO
#####################################################################
rag_chain = retrieval_chain | generator_chain
app = FastAPI(
title="LangChain Server",
version="1.0",
description="A simple api server using Langchain's Runnable interfaces",
)
add_routes(
app,
instruct_llm,
path="/basic_chat",
)
add_routes(
app,
generator_chain,
path="/generator",
)
add_routes(
app,
retrieval_chain,
path="/retriever",
)
## Might be encountered if this were for a standalone python file...
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=9012)
The individual chains seem to work as expected when running using RemoteRunnable and streaming them.