Skip to Content
IntegrationsLangChain

VectorX LangChain Integration Demo

This doc demonstrates how to use VectorX (an encrypted vector database) with LangChain for secure vector search and retrieval.

1. Installation

First, let’s make sure we have all the required packages installed.

# Install necessary packages !pip install vecx-langchain langchain langchain-openai

2. Setting up VectorX and OpenAI credentials

We need to set up our API credentials for VectorX and OpenAI.

import os from langchain_openai import OpenAIEmbeddings from vecx.vectorx import VectorX # Set API keys os.environ["OPENAI_API_KEY"] = "your-openai-api-key" vecx_api_token = "vectorx-api-key" # Initialize VectorX client vx = VectorX(token=vecx_api_token) # Generate encryption key if you don't have one encryption_key = vx.generate_key() # Make sure to save this key securely - you'll need it to access your encrypted vectors print("Encryption key:", encryption_key)

3. Initializing the Embedding Model

We’ll use OpenAI’s embeddings for our vector search.

# Initialize the embedding model embedding_model = OpenAIEmbeddings() # Get the embedding dimension (OpenAI's text-embedding-ada-002 model uses 1536 dimensions) dimension = 1536

4. Setting up VectorX with LangChain

Now we’ll set up the VectorX vector store integration with LangChain.

from vecx_langchain import VectorXVectorStore import time # Create a unique index name with timestamp to avoid conflicts timestamp = int(time.time()) index_name = f"langchain_demo_{timestamp}" # Initialize the VectorX vector store vector_store = VectorXVectorStore.from_params( embedding=embedding_model, api_token=vecx_api_token, encryption_key=encryption_key, index_name=index_name, dimension=dimension, space_type="cosine" # Can be "cosine", "l2", or "ip" ) print(f"Initialized VectorX vector store with index: {index_name}")

5. Creating Sample Documents

Let’s create some sample texts with metadata to index.

# Sample texts texts = [ "Python is a high-level, interpreted programming language known for its readability and simplicity.", "JavaScript is a scripting language that enables interactive web pages and is an essential part of web applications.", "Machine learning is a subset of artificial intelligence that provides systems the ability to automatically learn and improve from experience.", "Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning.", "Vector databases are specialized database systems designed to store and query high-dimensional vectors for similarity search.", "VectorX is an encrypted vector database that provides secure and private vector search capabilities." ] # Add metadata metadatas = [ {"category": "programming", "language": "python", "difficulty": "beginner", "doc_id": "doc1"}, {"category": "programming", "language": "javascript", "difficulty": "intermediate", "doc_id": "doc2"}, {"category": "ai", "field": "machine_learning", "difficulty": "advanced", "doc_id": "doc3"}, {"category": "ai", "field": "deep_learning", "difficulty": "advanced", "doc_id": "doc4"}, {"category": "database", "type": "vector", "difficulty": "intermediate", "doc_id": "doc5"}, {"category": "database", "type": "vector", "product": "vectorx", "difficulty": "intermediate", "doc_id": "doc6"} ] print(f"Created {len(texts)} sample documents")

6. Adding Documents to VectorX

Let’s add our documents to the VectorX vector store.

# Add texts to the vector store ids = vector_store.add_texts(texts=texts, metadatas=metadatas) print(f"Added {len(ids)} documents with the following IDs:") for i, doc_id in enumerate(ids): print(f"Document {i+1}: {doc_id}")

Now let’s perform a basic similarity search.

# Perform a basic similarity search query = "What is Python?" results = vector_store.similarity_search(query, k=2) print(f"Query: '{query}'") print(f"\nFound {len(results)} similar documents:") for i, doc in enumerate(results): print(f"\nResult {i+1}:") print(f"Content: {doc.page_content}") print(f"Metadata: {doc.metadata}")

8. Similarity Search with Scores

Let’s perform a similarity search that also returns the similarity scores.

# Search with scores query = "Tell me about vector databases" results_with_scores = vector_store.similarity_search_with_score(query, k=2) print(f"Query: '{query}'") print(f"\nFound {len(results_with_scores)} similar documents:") for i, (doc, score) in enumerate(results_with_scores): print(f"\nResult {i+1}:") print(f"Content: {doc.page_content}") print(f"Metadata: {doc.metadata}") print(f"Similarity Score: {score:.4f}")

9. Filtering Search Results by Metadata

Let’s perform a search with a metadata filter.

# Search with a filter query = "Tell me about programming languages" filter_dict = {"category": "programming"} filtered_results = vector_store.similarity_search( query=query, k=3, filter=filter_dict ) print(f"Query: '{query}' with filter: {filter_dict}") print(f"\nFound {len(filtered_results)} filtered results:") for i, doc in enumerate(filtered_results): print(f"\nResult {i+1}:") print(f"Content: {doc.page_content}") print(f"Metadata: {doc.metadata}")

10. Multiple Metadata Filters

Let’s try more specific filtering.

# Search with multiple filters query = "Tell me about AI" filter_dict = {"category": "ai", "difficulty": "advanced"} multi_filtered_results = vector_store.similarity_search( query=query, k=3, filter=filter_dict ) print(f"Query: '{query}' with filter: {filter_dict}") print(f"\nFound {len(multi_filtered_results)} filtered results:") for i, doc in enumerate(multi_filtered_results): print(f"\nResult {i+1}:") print(f"Content: {doc.page_content}") print(f"Metadata: {doc.metadata}")

11. Creating a Retriever

Let’s create a LangChain retriever from our vector store.

# Create a retriever from the vector store retriever = vector_store.as_retriever(search_kwargs={"k": 2}) # Use the retriever retrieved_docs = retriever.invoke("What is machine learning?") print(f"Retrieved {len(retrieved_docs)} documents:") for i, doc in enumerate(retrieved_docs): print(f"\nDocument {i+1}:") print(f"Content: {doc.page_content}") print(f"Metadata: {doc.metadata}")

12. Building a RAG Pipeline with LangChain

Let’s build a complete Retrieval-Augmented Generation (RAG) pipeline with LangChain.

from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_openai import ChatOpenAI # Initialize LLM llm = ChatOpenAI(model="gpt-3.5-turbo") # Create a prompt template prompt = ChatPromptTemplate.from_template( """ Answer the following question based only on the provided context: Context: {context} Question: {question} """ ) # Function to format documents def format_docs(docs): return "\n\n".join([doc.page_content for doc in docs]) # Create the RAG chain rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) # Use the RAG chain question = "What are vector databases and how do they work?" response = rag_chain.invoke(question) print(f"Question: {question}") print(f"\nResponse: {response}")

13. Deleting Documents

Let’s demonstrate how to delete documents from the vector store.

# Delete a document by ID id_to_delete = ids[0] # Delete the first document print(f"Deleting document with ID: {id_to_delete}") vector_store.delete(ids=[id_to_delete]) # Verify deletion by searching query = "Python programming language" results_after_delete = vector_store.similarity_search(query, k=1) print(f"\nResults after deletion for query '{query}':") for i, doc in enumerate(results_after_delete): print(f"Result {i+1}: {doc.page_content}")

14. Deleting by Filter

Let’s demonstrate how to delete documents using a filter.

# Delete documents by filter filter_to_delete = {"category": "programming"} print(f"Deleting documents with filter: {filter_to_delete}") vector_store.delete(filter=filter_to_delete) # Verify deletion by searching programming_query = "JavaScript programming" results_after_filter_delete = vector_store.similarity_search(programming_query, k=2) print(f"\nResults after filter deletion for query '{programming_query}':") for i, doc in enumerate(results_after_filter_delete): print(f"Result {i+1}: {doc.page_content}") print(f"Metadata: {doc.metadata}")

15. Persistence and Reconnection

VectorX stores your vectors in the cloud. Here’s how to reconnect to an existing index.

# To reconnect to an existing index, use the same parameters def reconnect_to_index(api_token, encryption_key, index_name): # Initialize the vector store with existing index reconnected_store = VectorXVectorStore.from_params( embedding=OpenAIEmbeddings(), api_token=api_token, encryption_key=encryption_key, index_name=index_name, dimension=dimension ) return reconnected_store # Example usage (commented out as we already have our vector store) # reconnected_store = reconnect_to_index(vecx_api_token, encryption_key, index_name) # results = reconnected_store.similarity_search("What are vector databases?", k=1) print(f"To reconnect to this index in the future, use:\n") print(f"API Token: {vecx_api_token}") print(f"Encryption Key: {encryption_key}") print(f"Index Name: {index_name}")

16. Cleanup

If you want to delete the index when you’re done, you can do so with the VectorX client.

# Comment this out if you want to keep your index # vx.delete_index(index_name) # print(f"Index {index_name} deleted")
Last updated on