1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
| class VectorStore: """向量存储封装""" def __init__(self, store_type='milvus', connection_params=None): self.store_type = store_type if store_type == 'milvus': from pymilvus import connections, Collection connections.connect(**connection_params) self.collection = Collection(connection_params['collection_name']) elif store_type == 'qdrant': from qdrant_client import QdrantClient self.client = QdrantClient(**connection_params) elif store_type == 'chroma': import chromadb self.client = chromadb.Client() def add(self, chunks, embeddings, ids=None): """添加向量到数据库""" if ids is None: ids = [str(i) for i in range(len(chunks))] if self.store_type == 'milvus': self.collection.insert([ { 'id': id_, 'vector': embedding, 'content': chunk['content'], 'metadata': chunk['metadata'] } for id_, embedding, chunk in zip(ids, embeddings, chunks) ]) elif self.store_type == 'chroma': self.client.add( ids=ids, embeddings=embeddings, documents=[c['content'] for c in chunks], metadatas=[c['metadata'] for c in chunks] ) def search(self, query_embedding, top_k=5, similarity_threshold=0.7): """相似度搜索""" if self.store_type == 'milvus': results = self.collection.search( data=[query_embedding], anns_field='vector', top_k=top_k, param={'metric_type': 'IP'} ) return [r for r in results[0] if r.score >= similarity_threshold] elif self.store_type == 'chroma': results = self.client.query( query_embeddings=[query_embedding], n_results=top_k ) return results
|