saaketvarma commited on
Commit
3d0d04e
·
1 Parent(s): 45919b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -2,6 +2,7 @@ import base64
2
  import os
3
 
4
  import streamlit as st
 
5
  from langchain.chains import RetrievalQA
6
  from langchain.document_loaders import PDFMinerLoader
7
  from langchain.embeddings import SentenceTransformerEmbeddings
@@ -36,11 +37,12 @@ def data_ingestion():
36
  loader = PDFMinerLoader(os.path.join(root, file))
37
 
38
  documents = loader.load()
39
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=500)
40
  splits = text_splitter.split_documents(documents)
41
 
42
- # create embeddings here
43
- embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
 
44
  vectordb = FAISS.from_documents(splits, embeddings)
45
  vectordb.save_local("faiss_index")
46
 
 
2
  import os
3
 
4
  import streamlit as st
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.chains import RetrievalQA
7
  from langchain.document_loaders import PDFMinerLoader
8
  from langchain.embeddings import SentenceTransformerEmbeddings
 
37
  loader = PDFMinerLoader(os.path.join(root, file))
38
 
39
  documents = loader.load()
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
41
  splits = text_splitter.split_documents(documents)
42
 
43
+ # create embeddings of the chunked document
44
+ #embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
45
+ embeddings = OpenAIEmbeddings()
46
  vectordb = FAISS.from_documents(splits, embeddings)
47
  vectordb.save_local("faiss_index")
48