| |
|
| | import streamlit as st |
| | from langchain.document_loaders import TextLoader |
| | from langchain.text_splitter import RecursiveCharacterTextSplitter |
| | from langchain.embeddings import HuggingFaceEmbeddings |
| | from langchain.vectorstores import FAISS |
| | from langchain.chains import RetrievalQA |
| | from langchain.llms import HuggingFaceHub |
| |
|
| | @st.cache_resource |
| | def load_vector_store(): |
| | loader = TextLoader("data/sample.txt") |
| | documents = loader.load() |
| |
|
| | splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) |
| | chunks = splitter.split_documents(documents) |
| |
|
| | embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
| | db = FAISS.from_documents(chunks, embedding_model) |
| | return db |
| |
|
| | def main(): |
| | st.title("π Ask Your Document (RAG with LangChain + Hugging Face)") |
| | st.write("Upload a document, ask questions, and get answers powered by open-source LLMs!") |
| |
|
| | query = st.text_input("Enter your question:") |
| | if query: |
| | db = load_vector_store() |
| | qa_chain = RetrievalQA.from_chain_type( |
| | llm=HuggingFaceHub( |
| | repo_id="google/flan-t5-base", |
| | model_kwargs={"temperature": 0.5, "max_length": 256} |
| | ), |
| | retriever=db.as_retriever(), |
| | return_source_documents=True |
| | ) |
| | result = qa_chain.run(query) |
| | st.write("### π Answer") |
| | st.write(result) |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|