VectorX LangChain Integration Demo
This doc demonstrates how to use VectorX (an encrypted vector database) with LangChain for secure vector search and retrieval.
1. Installation
First, let’s make sure we have all the required packages installed.
# Install necessary packages
!pip install vecx-langchain langchain langchain-openai
2. Setting up VectorX and OpenAI credentials
We need to set up our API credentials for VectorX and OpenAI.
import os
from langchain_openai import OpenAIEmbeddings
from vecx.vectorx import VectorX
# Set API keys
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
vecx_api_token = "vectorx-api-key"
# Initialize VectorX client
vx = VectorX(token=vecx_api_token)
# Generate encryption key if you don't have one
encryption_key = vx.generate_key()
# Make sure to save this key securely - you'll need it to access your encrypted vectors
print("Encryption key:", encryption_key)
3. Initializing the Embedding Model
We’ll use OpenAI’s embeddings for our vector search.
# Initialize the embedding model
embedding_model = OpenAIEmbeddings()
# Get the embedding dimension (OpenAI's text-embedding-ada-002 model uses 1536 dimensions)
dimension = 1536
4. Setting up VectorX with LangChain
Now we’ll set up the VectorX vector store integration with LangChain.
from vecx_langchain import VectorXVectorStore
import time
# Create a unique index name with timestamp to avoid conflicts
timestamp = int(time.time())
index_name = f"langchain_demo_{timestamp}"
# Initialize the VectorX vector store
vector_store = VectorXVectorStore.from_params(
embedding=embedding_model,
api_token=vecx_api_token,
encryption_key=encryption_key,
index_name=index_name,
dimension=dimension,
space_type="cosine" # Can be "cosine", "l2", or "ip"
)
print(f"Initialized VectorX vector store with index: {index_name}")
5. Creating Sample Documents
Let’s create some sample texts with metadata to index.
# Sample texts
texts = [
"Python is a high-level, interpreted programming language known for its readability and simplicity.",
"JavaScript is a scripting language that enables interactive web pages and is an essential part of web applications.",
"Machine learning is a subset of artificial intelligence that provides systems the ability to automatically learn and improve from experience.",
"Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning.",
"Vector databases are specialized database systems designed to store and query high-dimensional vectors for similarity search.",
"VectorX is an encrypted vector database that provides secure and private vector search capabilities."
]
# Add metadata
metadatas = [
{"category": "programming", "language": "python", "difficulty": "beginner", "doc_id": "doc1"},
{"category": "programming", "language": "javascript", "difficulty": "intermediate", "doc_id": "doc2"},
{"category": "ai", "field": "machine_learning", "difficulty": "advanced", "doc_id": "doc3"},
{"category": "ai", "field": "deep_learning", "difficulty": "advanced", "doc_id": "doc4"},
{"category": "database", "type": "vector", "difficulty": "intermediate", "doc_id": "doc5"},
{"category": "database", "type": "vector", "product": "vectorx", "difficulty": "intermediate", "doc_id": "doc6"}
]
print(f"Created {len(texts)} sample documents")
6. Adding Documents to VectorX
Let’s add our documents to the VectorX vector store.
# Add texts to the vector store
ids = vector_store.add_texts(texts=texts, metadatas=metadatas)
print(f"Added {len(ids)} documents with the following IDs:")
for i, doc_id in enumerate(ids):
print(f"Document {i+1}: {doc_id}")
7. Basic Similarity Search
Now let’s perform a basic similarity search.
# Perform a basic similarity search
query = "What is Python?"
results = vector_store.similarity_search(query, k=2)
print(f"Query: '{query}'")
print(f"\nFound {len(results)} similar documents:")
for i, doc in enumerate(results):
print(f"\nResult {i+1}:")
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
8. Similarity Search with Scores
Let’s perform a similarity search that also returns the similarity scores.
# Search with scores
query = "Tell me about vector databases"
results_with_scores = vector_store.similarity_search_with_score(query, k=2)
print(f"Query: '{query}'")
print(f"\nFound {len(results_with_scores)} similar documents:")
for i, (doc, score) in enumerate(results_with_scores):
print(f"\nResult {i+1}:")
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
print(f"Similarity Score: {score:.4f}")
9. Filtering Search Results by Metadata
Let’s perform a search with a metadata filter.
# Search with a filter
query = "Tell me about programming languages"
filter_dict = {"category": "programming"}
filtered_results = vector_store.similarity_search(
query=query,
k=3,
filter=filter_dict
)
print(f"Query: '{query}' with filter: {filter_dict}")
print(f"\nFound {len(filtered_results)} filtered results:")
for i, doc in enumerate(filtered_results):
print(f"\nResult {i+1}:")
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
10. Multiple Metadata Filters
Let’s try more specific filtering.
# Search with multiple filters
query = "Tell me about AI"
filter_dict = {"category": "ai", "difficulty": "advanced"}
multi_filtered_results = vector_store.similarity_search(
query=query,
k=3,
filter=filter_dict
)
print(f"Query: '{query}' with filter: {filter_dict}")
print(f"\nFound {len(multi_filtered_results)} filtered results:")
for i, doc in enumerate(multi_filtered_results):
print(f"\nResult {i+1}:")
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
11. Creating a Retriever
Let’s create a LangChain retriever from our vector store.
# Create a retriever from the vector store
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
# Use the retriever
retrieved_docs = retriever.invoke("What is machine learning?")
print(f"Retrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs):
print(f"\nDocument {i+1}:")
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
12. Building a RAG Pipeline with LangChain
Let’s build a complete Retrieval-Augmented Generation (RAG) pipeline with LangChain.
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
# Initialize LLM
llm = ChatOpenAI(model="gpt-3.5-turbo")
# Create a prompt template
prompt = ChatPromptTemplate.from_template(
"""
Answer the following question based only on the provided context:
Context: {context}
Question: {question}
"""
)
# Function to format documents
def format_docs(docs):
return "\n\n".join([doc.page_content for doc in docs])
# Create the RAG chain
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Use the RAG chain
question = "What are vector databases and how do they work?"
response = rag_chain.invoke(question)
print(f"Question: {question}")
print(f"\nResponse: {response}")
13. Deleting Documents
Let’s demonstrate how to delete documents from the vector store.
# Delete a document by ID
id_to_delete = ids[0] # Delete the first document
print(f"Deleting document with ID: {id_to_delete}")
vector_store.delete(ids=[id_to_delete])
# Verify deletion by searching
query = "Python programming language"
results_after_delete = vector_store.similarity_search(query, k=1)
print(f"\nResults after deletion for query '{query}':")
for i, doc in enumerate(results_after_delete):
print(f"Result {i+1}: {doc.page_content}")
14. Deleting by Filter
Let’s demonstrate how to delete documents using a filter.
# Delete documents by filter
filter_to_delete = {"category": "programming"}
print(f"Deleting documents with filter: {filter_to_delete}")
vector_store.delete(filter=filter_to_delete)
# Verify deletion by searching
programming_query = "JavaScript programming"
results_after_filter_delete = vector_store.similarity_search(programming_query, k=2)
print(f"\nResults after filter deletion for query '{programming_query}':")
for i, doc in enumerate(results_after_filter_delete):
print(f"Result {i+1}: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
15. Persistence and Reconnection
VectorX stores your vectors in the cloud. Here’s how to reconnect to an existing index.
# To reconnect to an existing index, use the same parameters
def reconnect_to_index(api_token, encryption_key, index_name):
# Initialize the vector store with existing index
reconnected_store = VectorXVectorStore.from_params(
embedding=OpenAIEmbeddings(),
api_token=api_token,
encryption_key=encryption_key,
index_name=index_name,
dimension=dimension
)
return reconnected_store
# Example usage (commented out as we already have our vector store)
# reconnected_store = reconnect_to_index(vecx_api_token, encryption_key, index_name)
# results = reconnected_store.similarity_search("What are vector databases?", k=1)
print(f"To reconnect to this index in the future, use:\n")
print(f"API Token: {vecx_api_token}")
print(f"Encryption Key: {encryption_key}")
print(f"Index Name: {index_name}")
16. Cleanup
If you want to delete the index when you’re done, you can do so with the VectorX client.
# Comment this out if you want to keep your index
# vx.delete_index(index_name)
# print(f"Index {index_name} deleted")