Skip to Content
IntegrationsLlamaIndex

VectorX LlamaIndex Integration Demo

This doc demonstrates how to use VectorX (an encrypted vector database) with LlamaIndex for secure vector search and retrieval.

1. Installation

First, let’s make sure we have all the required packages installed.

# Install necessary packages !pip install vecx-llamaindex

2. Setting up VectorX and OpenAI credentials

We need to set up our API credentials for VectorX and OpenAI.

import os from llama_index.embeddings.openai import OpenAIEmbedding from vecx.vectorx import VectorX # Set API keys os.environ["OPENAI_API_KEY"] = "your-openai-api-key" vecx_api_token = "vectorx-api-key" # Initialize VectorX client vx = VectorX(token=vecx_api_token) # Generate encryption key if you don't have one encryption_key = vx.generate_key() # Make sure to save this key securely - you'll need it to access your encrypted vectors print("Encryption key:", encryption_key)

3. Creating Sample Documents

Let’s create some sample documents to index.

from llama_index.core import Document # Create sample documents with different categories and metadata documents = [ Document( text="Python is a high-level, interpreted programming language known for its readability and simplicity.", metadata={"category": "programming", "language": "python", "difficulty": "beginner"} ), Document( text="JavaScript is a scripting language that enables interactive web pages and is an essential part of web applications.", metadata={"category": "programming", "language": "javascript", "difficulty": "intermediate"} ), Document( text="Machine learning is a subset of artificial intelligence that provides systems the ability to automatically learn and improve from experience.", metadata={"category": "ai", "field": "machine_learning", "difficulty": "advanced"} ), Document( text="Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning.", metadata={"category": "ai", "field": "deep_learning", "difficulty": "advanced"} ), Document( text="Vector databases are specialized database systems designed to store and query high-dimensional vectors for similarity search.", metadata={"category": "database", "type": "vector", "difficulty": "intermediate"} ), Document( text="VectorX is an encrypted vector database that provides secure and private vector search capabilities.", metadata={"category": "database", "type": "vector", "product": "vectorx", "difficulty": "intermediate"} ) ] print(f"Created {len(documents)} sample documents")

4. Setting up VectorX with LlamaIndex

Now we’ll set up the VectorX vector store integration with LlamaIndex.

from vecx_llamaindex import VectorXVectorStore from llama_index.core import StorageContext import time # Create a unique index name with timestamp to avoid conflicts timestamp = int(time.time()) index_name = f"llamaindex_demo_{timestamp}" # Set up the embedding model embed_model = OpenAIEmbedding() # Get the embedding dimension dimension = 1536 # OpenAI's default embedding dimension # Initialize the VectorX vector store vector_store = VectorXVectorStore.from_params( api_token=vecx_api_token, encryption_key=encryption_key, index_name=index_name, dimension=dimension, space_type="cosine" # Can be "cosine", "l2", or "ip" ) # Create storage context with our vector store storage_context = StorageContext.from_defaults(vector_store=vector_store) print(f"Initialized VectorX vector store with index: {index_name}")

5. Creating a Vector Index from Documents

Let’s create a vector index from our documents.

from llama_index.core import VectorStoreIndex # Create a vector index index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, embed_model=embed_model ) print("Vector index created successfully")

6. Basic Retrieval with Query Engine

Now we can create a query engine and perform a basic search.

# Create a query engine query_engine = index.as_query_engine() # Ask a question response = query_engine.query("What is Python?") print("Query: What is Python?") print("Response:") print(response)

7. Using Metadata Filters

Let’s demonstrate how to use metadata filters to narrow down our search.

from llama_index.core.vector_stores.types import MetadataFilters, MetadataFilter, FilterOperator # Create a filtered retriever to only search within AI-related documents ai_filter = MetadataFilter(key="category", value="ai", operator=FilterOperator.EQ) ai_filters = MetadataFilters(filters=[ai_filter]) # Create a filtered query engine filtered_query_engine = index.as_query_engine(filters=ai_filters) # Ask a general question but only using AI documents response = filtered_query_engine.query("What is learning from data?") print("Filtered Query (AI category only): What is learning from data?") print("Response:") print(response)

8. Advanced Filtering with Multiple Conditions

We can also combine multiple metadata filters.

# Create a more complex filter: database category AND intermediate difficulty category_filter = MetadataFilter(key="category", value="database", operator=FilterOperator.EQ) difficulty_filter = MetadataFilter(key="difficulty", value="intermediate", operator=FilterOperator.EQ) complex_filters = MetadataFilters(filters=[category_filter, difficulty_filter]) # Create a query engine with the complex filters complex_filtered_engine = index.as_query_engine(filters=complex_filters) # Query with the complex filters response = complex_filtered_engine.query("Tell me about databases") print("Complex Filtered Query (database category AND intermediate difficulty): Tell me about databases") print("Response:") print(response)

9. Custom Retriever Setup

We can also create a custom retriever with more control over the retrieval process.

from llama_index.core.retrievers import VectorIndexRetriever # Create a retriever with custom parameters retriever = VectorIndexRetriever( index=index, similarity_top_k=3, # Return top 3 most similar results filters=ai_filters # Use our AI category filter from before ) # Retrieve nodes for a query nodes = retriever.retrieve("What is deep learning?") print(f"Retrieved {len(nodes)} nodes for query: 'What is deep learning?' (with AI category filter)") print("\nRetrieved content:") for i, node in enumerate(nodes): print(f"\nNode {i+1}:") print(f"Text: {node.node.text}") print(f"Metadata: {node.node.metadata}") print(f"Score: {node.score:.4f}")

10. Using a Custom Retriever with a Query Engine

We can use our custom retriever with a query engine for more control.

from llama_index.core.query_engine import RetrieverQueryEngine # Create a query engine with our custom retriever custom_query_engine = RetrieverQueryEngine.from_args( retriever=retriever, verbose=True # Enable verbose mode to see the retrieved nodes ) # Query using the custom retriever query engine response = custom_query_engine.query("Explain the difference between machine learning and deep learning") print("\nFinal Response:") print(response)

11. Direct VectorStore Querying

We can also query the VectorX vector store directly, bypassing the LlamaIndex query engine.

from llama_index.core.vector_stores.types import VectorStoreQuery # Generate an embedding for our query query_text = "What are vector databases?" query_embedding = embed_model.get_text_embedding(query_text) # Create a VectorStoreQuery vector_store_query = VectorStoreQuery( query_embedding=query_embedding, similarity_top_k=2, filters=MetadataFilters(filters=[MetadataFilter(key="category", value="database", operator=FilterOperator.EQ)]) ) # Execute the query directly on the vector store query_result = vector_store.query(vector_store_query) print(f"Direct VectorStore query: '{query_text}'") print(f"Retrieved {len(query_result.nodes)} results with database category filter:") for i, (node, score) in enumerate(zip(query_result.nodes, query_result.similarities)): print(f"\nResult {i+1}:") print(f"Text: {node.text}") print(f"Metadata: {node.metadata}") print(f"Similarity score: {score:.4f}")

12. Saving and Loading Indexes

With VectorX, your vectors and metadata are stored securely in the cloud. You can reconnect to your index in future sessions.

# To reconnect to an existing index in a future session, you would use: def reconnect_to_index(api_token, encryption_key, index_name): # Initialize the vector store with existing index vector_store = VectorXVectorStore.from_params( api_token=api_token, encryption_key=encryption_key, index_name=index_name ) # Create storage context storage_context = StorageContext.from_defaults(vector_store=vector_store) # Load the index index = VectorStoreIndex.from_vector_store( vector_store, embed_model=OpenAIEmbedding() ) return index # Example usage (commented out as we already have our index) # reconnected_index = reconnect_to_index(vecx_api_token, encryption_key, index_name) # query_engine = reconnected_index.as_query_engine() # response = query_engine.query("What is VectorX?") # print(response) print(f"To reconnect to this index in the future, use:\n") print(f"API Token: {vecx_api_token}") print(f"Encryption Key: {encryption_key}") print(f"Index Name: {index_name}")

13. Cleanup

If you want to delete the index when you’re done, you can do so with the VectorX client.

# Comment this out if you want to keep your index # vx.delete_index(index_name) # print(f"Index {index_name} deleted")
Last updated on