VectorX LangChain Integration Demo

This doc demonstrates how to use VectorX (an encrypted vector database) with LangChain for secure vector search and retrieval.

1. Installation

First, let’s make sure we have all the required packages installed.


# Install necessary packages
!pip install vecx-langchain langchain langchain-openai

2. Setting up VectorX and OpenAI credentials

We need to set up our API credentials for VectorX and OpenAI.


import os
from langchain_openai import OpenAIEmbeddings
from vecx.vectorx import VectorX
 
# Set API keys
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"
vecx_api_token = "vectorx-api-key"
 
# Initialize VectorX client
vx = VectorX(token=vecx_api_token)
 
# Generate encryption key if you don't have one
encryption_key = vx.generate_key()
# Make sure to save this key securely - you'll need it to access your encrypted vectors
print("Encryption key:", encryption_key)

3. Initializing the Embedding Model

We’ll use OpenAI’s embeddings for our vector search.


# Initialize the embedding model
embedding_model = OpenAIEmbeddings()
 
# Get the embedding dimension (OpenAI's text-embedding-ada-002 model uses 1536 dimensions)
dimension = 1536

4. Setting up VectorX with LangChain

Now we’ll set up the VectorX vector store integration with LangChain.


from vecx_langchain import VectorXVectorStore
import time
 
# Create a unique index name with timestamp to avoid conflicts
timestamp = int(time.time())
index_name = f"langchain_demo_{timestamp}"
 
# Initialize the VectorX vector store
vector_store = VectorXVectorStore.from_params(
    embedding=embedding_model,
    api_token=vecx_api_token,
    encryption_key=encryption_key,
    index_name=index_name,
    dimension=dimension,
    space_type="cosine"  # Can be "cosine", "l2", or "ip"
)
 
print(f"Initialized VectorX vector store with index: {index_name}")

5. Creating Sample Documents

Let’s create some sample texts with metadata to index.


# Sample texts
texts = [
    "Python is a high-level, interpreted programming language known for its readability and simplicity.",
    "JavaScript is a scripting language that enables interactive web pages and is an essential part of web applications.",
    "Machine learning is a subset of artificial intelligence that provides systems the ability to automatically learn and improve from experience.",
    "Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning.",
    "Vector databases are specialized database systems designed to store and query high-dimensional vectors for similarity search.",
    "VectorX is an encrypted vector database that provides secure and private vector search capabilities."
]
 
# Add metadata
metadatas = [
    {"category": "programming", "language": "python", "difficulty": "beginner", "doc_id": "doc1"},
    {"category": "programming", "language": "javascript", "difficulty": "intermediate", "doc_id": "doc2"},
    {"category": "ai", "field": "machine_learning", "difficulty": "advanced", "doc_id": "doc3"},
    {"category": "ai", "field": "deep_learning", "difficulty": "advanced", "doc_id": "doc4"},
    {"category": "database", "type": "vector", "difficulty": "intermediate", "doc_id": "doc5"},
    {"category": "database", "type": "vector", "product": "vectorx", "difficulty": "intermediate", "doc_id": "doc6"}
]
 
print(f"Created {len(texts)} sample documents")

6. Adding Documents to VectorX

Let’s add our documents to the VectorX vector store.


# Add texts to the vector store
ids = vector_store.add_texts(texts=texts, metadatas=metadatas)
 
print(f"Added {len(ids)} documents with the following IDs:")
for i, doc_id in enumerate(ids):
    print(f"Document {i+1}: {doc_id}")

7. Basic Similarity Search

Now let’s perform a basic similarity search.


# Perform a basic similarity search
query = "What is Python?"
results = vector_store.similarity_search(query, k=2)
 
print(f"Query: '{query}'")
print(f"\nFound {len(results)} similar documents:")
for i, doc in enumerate(results):
    print(f"\nResult {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

8. Similarity Search with Scores

Let’s perform a similarity search that also returns the similarity scores.


# Search with scores
query = "Tell me about vector databases"
results_with_scores = vector_store.similarity_search_with_score(query, k=2)
 
print(f"Query: '{query}'")
print(f"\nFound {len(results_with_scores)} similar documents:")
for i, (doc, score) in enumerate(results_with_scores):
    print(f"\nResult {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print(f"Similarity Score: {score:.4f}")

9. Filtering Search Results by Metadata

Let’s perform a search with a metadata filter.


# Search with a filter
query = "Tell me about programming languages"
filter_dict = {"category": "programming"}
 
filtered_results = vector_store.similarity_search(
    query=query,
    k=3,
    filter=filter_dict
)
 
print(f"Query: '{query}' with filter: {filter_dict}")
print(f"\nFound {len(filtered_results)} filtered results:")
for i, doc in enumerate(filtered_results):
    print(f"\nResult {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

10. Multiple Metadata Filters

Let’s try more specific filtering.


# Search with multiple filters
query = "Tell me about AI"
filter_dict = {"category": "ai", "difficulty": "advanced"}
 
multi_filtered_results = vector_store.similarity_search(
    query=query,
    k=3,
    filter=filter_dict
)
 
print(f"Query: '{query}' with filter: {filter_dict}")
print(f"\nFound {len(multi_filtered_results)} filtered results:")
for i, doc in enumerate(multi_filtered_results):
    print(f"\nResult {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

11. Creating a Retriever

Let’s create a LangChain retriever from our vector store.


# Create a retriever from the vector store
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
 
# Use the retriever
retrieved_docs = retriever.invoke("What is machine learning?")
 
print(f"Retrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs):
    print(f"\nDocument {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

12. Building a RAG Pipeline with LangChain

Let’s build a complete Retrieval-Augmented Generation (RAG) pipeline with LangChain.


from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
 
# Initialize LLM
llm = ChatOpenAI(model="gpt-3.5-turbo")
 
# Create a prompt template
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    
    Context: {context}
    
    Question: {question}
    """
)
 
# Function to format documents
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])
 
# Create the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
 
# Use the RAG chain
question = "What are vector databases and how do they work?"
response = rag_chain.invoke(question)
 
print(f"Question: {question}")
print(f"\nResponse: {response}")

13. Deleting Documents

Let’s demonstrate how to delete documents from the vector store.


# Delete a document by ID
id_to_delete = ids[0]  # Delete the first document
print(f"Deleting document with ID: {id_to_delete}")
 
vector_store.delete(ids=[id_to_delete])
 
# Verify deletion by searching
query = "Python programming language"
results_after_delete = vector_store.similarity_search(query, k=1)
 
print(f"\nResults after deletion for query '{query}':")
for i, doc in enumerate(results_after_delete):
    print(f"Result {i+1}: {doc.page_content}")

14. Deleting by Filter

Let’s demonstrate how to delete documents using a filter.


# Delete documents by filter
filter_to_delete = {"category": "programming"}
print(f"Deleting documents with filter: {filter_to_delete}")
 
vector_store.delete(filter=filter_to_delete)
 
# Verify deletion by searching
programming_query = "JavaScript programming"
results_after_filter_delete = vector_store.similarity_search(programming_query, k=2)
 
print(f"\nResults after filter deletion for query '{programming_query}':")
for i, doc in enumerate(results_after_filter_delete):
    print(f"Result {i+1}: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

15. Persistence and Reconnection

VectorX stores your vectors in the cloud. Here’s how to reconnect to an existing index.


# To reconnect to an existing index, use the same parameters
def reconnect_to_index(api_token, encryption_key, index_name):
    # Initialize the vector store with existing index
    reconnected_store = VectorXVectorStore.from_params(
        embedding=OpenAIEmbeddings(),
        api_token=api_token,
        encryption_key=encryption_key,
        index_name=index_name,
        dimension=dimension
    )
    return reconnected_store
 
# Example usage (commented out as we already have our vector store)
# reconnected_store = reconnect_to_index(vecx_api_token, encryption_key, index_name)
# results = reconnected_store.similarity_search("What are vector databases?", k=1)
 
print(f"To reconnect to this index in the future, use:\n")
print(f"API Token: {vecx_api_token}")
print(f"Encryption Key: {encryption_key}")
print(f"Index Name: {index_name}")

16. Cleanup

If you want to delete the index when you’re done, you can do so with the VectorX client.


# Comment this out if you want to keep your index
# vx.delete_index(index_name)
# print(f"Index {index_name} deleted")