Spaces:

ohalkhateeb
/

Dubai_Legislation

Runtime error

ohalkhateeb commited on Mar 4

Commit

8e78866

verified ·

1 Parent(s): cc8f4aa

Update create_database.py

Files changed (1) hide show

create_database.py CHANGED Viewed

@@ -3,22 +3,16 @@ from langchain.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import json
-    def create_vector_database(input_path, persist_directory):
-        # Load preprocessed data
-        with open(input_path, "r") as f:
-            docs = json.load(f)
-        # Load an embedding model
-        embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-        # Split text into smaller chunks
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-        all_texts = [chunk for doc in docs for chunk in text_splitter.split_text(doc["content"])]
-        # Create a ChromaDB vector store
-        vector_db = Chroma.from_texts(texts=all_texts, embedding=embedding_model, persist_directory=persist_directory)
-        print("Vector database created successfully!")
-    if __name__ == "__main__":
-        create_vector_database("preprocessed_data.json", "db")  # Change paths as needed

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import json
+def create_vector_database(input_path, persist_directory):
+    with open(input_path, "r") as f:
+        docs = json.load(f)
+    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+    all_texts = [chunk for doc in docs for chunk in text_splitter.split_text(doc["content"])]
+    vector_db = Chroma.from_texts(texts=all_texts, embedding=embedding_model, persist_directory=persist_directory)
+    print("Vector database created successfully!")
+if __name__ == "__main__":
+    create_vector_database("preprocessed_data.json", "db")