Passage Jina -> intfloat/multilingual-e5-large
Jina me donne toujours RuntimeError: The size of tensor a (5) must match the size of tensor b (4) at non-singleton dimension 1
This commit is contained in:
@@ -3,43 +3,21 @@ from langchain_chroma import Chroma # TODO plus tard, ramplacer par PG Vector
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Permet de garder ChromaDB en mémoire.
|
||||
# Cette classe est un Singleton, il n'y en aura qu'une seule et unique instance à tout moment
|
||||
# https://refactoring.guru/design-patterns/singleton
|
||||
class VectorDatabase:
|
||||
instance = None
|
||||
base_dir:str = Path(sys.argv[0]).resolve().parent.as_posix() # Récupérer le chemin vers le point d'entrée du programme
|
||||
bdd_path:str = base_dir + "/../chroma_db/"
|
||||
|
||||
def __new__(cls): # Selon https://www.geeksforgeeks.org/python/singleton-pattern-in-python-a-complete-guide/
|
||||
if cls.instance is None:
|
||||
cls.instance = super().__new__(cls)
|
||||
# J'initialise les attributs à None ici, permet de tester si la classe a déjà été init une première fois ou non
|
||||
cls.instance.__embeddings = None
|
||||
cls.instance.__chroma = None
|
||||
return cls.instance
|
||||
|
||||
def __init__(self):
|
||||
if self.__embeddings is not None: return
|
||||
|
||||
base_dir:str = Path(sys.argv[0]).resolve().parent.as_posix() # Récupérer le chemin vers le point d'entrée du programme
|
||||
bdd_path:str = base_dir + "/chroma_db/"
|
||||
|
||||
self.__embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v3", model_kwargs={"trust_remote_code": True})
|
||||
self.__chroma = Chroma(
|
||||
EMBEDDINGS = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large", model_kwargs={"trust_remote_code": True})
|
||||
CHROMA = Chroma(
|
||||
persist_directory=bdd_path,
|
||||
embedding_function=self.__embeddings
|
||||
embedding_function=EMBEDDINGS
|
||||
)
|
||||
|
||||
def getChroma(self)->Chroma:
|
||||
return self.__chroma
|
||||
class VectorDatabase: # Classe pour récupérer la BDD
|
||||
|
||||
def getEmbeddings(self)->'Embeddings Hugging Face':
|
||||
return self.__embeddings
|
||||
@staticmethod
|
||||
def getChroma()->Chroma:
|
||||
return CHROMA
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
test1 = VectorDatabase()
|
||||
print('TEST 1 INIT')
|
||||
test2 = VectorDatabase()
|
||||
|
||||
print(test1 is test2)
|
||||
assert test1 is test2
|
||||
@staticmethod
|
||||
def getEmbeddings()->'Embeddings Hugging Face':
|
||||
return EMBEDDINGS
|
||||
@@ -43,7 +43,7 @@ print("===")
|
||||
# Création du modèle d'embeddings
|
||||
# https://docs.langchain.com/oss/python/integrations/text_embedding/huggingfacehub
|
||||
# https://huggingface.co/jinaai/jina-clip-v2
|
||||
embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v3", model_kwargs={"trust_remote_code": True})
|
||||
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large", model_kwargs={"trust_remote_code": True})
|
||||
|
||||
# Stockage des embeddings dans ChromaDB dans un dossier local "chroma_db"
|
||||
vectorstore = Chroma.from_documents(documents=chunks,embedding=embeddings, persist_directory=base_dir.as_posix()+"/chroma_db/",) # https://docs.langchain.com/oss/python/integrations/vectorstores/chroma
|
||||
|
||||
Reference in New Issue
Block a user