diff --git a/AgentReact/utils/VectorDatabase.py b/AgentReact/utils/VectorDatabase.py index c1d9f0e..4e944b2 100644 --- a/AgentReact/utils/VectorDatabase.py +++ b/AgentReact/utils/VectorDatabase.py @@ -3,43 +3,21 @@ from langchain_chroma import Chroma # TODO plus tard, ramplacer par PG Vector import sys from pathlib import Path -# Permet de garder ChromaDB en mémoire. -# Cette classe est un Singleton, il n'y en aura qu'une seule et unique instance à tout moment -# https://refactoring.guru/design-patterns/singleton -class VectorDatabase: - instance = None +base_dir:str = Path(sys.argv[0]).resolve().parent.as_posix() # Récupérer le chemin vers le point d'entrée du programme +bdd_path:str = base_dir + "/../chroma_db/" - def __new__(cls): # Selon https://www.geeksforgeeks.org/python/singleton-pattern-in-python-a-complete-guide/ - if cls.instance is None: - cls.instance = super().__new__(cls) - # J'initialise les attributs à None ici, permet de tester si la classe a déjà été init une première fois ou non - cls.instance.__embeddings = None - cls.instance.__chroma = None - return cls.instance - - def __init__(self): - if self.__embeddings is not None: return - - base_dir:str = Path(sys.argv[0]).resolve().parent.as_posix() # Récupérer le chemin vers le point d'entrée du programme - bdd_path:str = base_dir + "/chroma_db/" - - self.__embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v3", model_kwargs={"trust_remote_code": True}) - self.__chroma = Chroma( +EMBEDDINGS = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large", model_kwargs={"trust_remote_code": True}) +CHROMA = Chroma( persist_directory=bdd_path, - embedding_function=self.__embeddings + embedding_function=EMBEDDINGS ) - def getChroma(self)->Chroma: - return self.__chroma +class VectorDatabase: # Classe pour récupérer la BDD - def getEmbeddings(self)->'Embeddings Hugging Face': - return self.__embeddings + @staticmethod + def getChroma()->Chroma: + return CHROMA -if __name__ == "__main__": - - test1 = VectorDatabase() - print('TEST 1 INIT') - test2 = VectorDatabase() - - print(test1 is test2) - assert test1 is test2 \ No newline at end of file + @staticmethod + def getEmbeddings()->'Embeddings Hugging Face': + return EMBEDDINGS \ No newline at end of file diff --git a/RAG/init.py b/RAG/init.py index a328e71..40e52c6 100644 --- a/RAG/init.py +++ b/RAG/init.py @@ -43,7 +43,7 @@ print("===") # Création du modèle d'embeddings # https://docs.langchain.com/oss/python/integrations/text_embedding/huggingfacehub # https://huggingface.co/jinaai/jina-clip-v2 -embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v3", model_kwargs={"trust_remote_code": True}) +embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large", model_kwargs={"trust_remote_code": True}) # Stockage des embeddings dans ChromaDB dans un dossier local "chroma_db" vectorstore = Chroma.from_documents(documents=chunks,embedding=embeddings, persist_directory=base_dir.as_posix()+"/chroma_db/",) # https://docs.langchain.com/oss/python/integrations/vectorstores/chroma