New update
As you could see in my last example, we were able to do the vector search in couchbase but we had not recovered the rest of the fields of the document, which would be necessary to be able to obtain, for example, the chunks of the vector storage retriever, in order to feed from those chunks to the LLM
In this update of the example I want to show how to proceed to obtain the fields that we require from the documents that we obtain as a result of the vector search
from couchbase.cluster import Cluster
from couchbase.auth import PasswordAuthenticator
from couchbase.exceptions import CouchbaseException
import couchbase.search as search
from couchbase.options import SearchOptions
from couchbase.vector_search import VectorQuery, VectorSearch
from couchbase.options import QueryOptions
from langchain_core.documents.base import Document
authenticator = PasswordAuthenticator('Administrator', 'xxxxxx')
cluster = Cluster('couchbase://localhost?username=Administrator&password=xxxxxx', authenticator=authenticator, timeout=30)
# Open the bucket and collection
bucket = cluster.bucket('chatui')
collection = bucket.default_collection()
scope = bucket.default_scope()
query_vector = [0.255, 0.239, 0.213, 0.218, 0.197]
search_index = "color_index"
try:
search_req = search.SearchRequest.create(search.MatchNoneQuery()).with_vector_search(
VectorSearch.from_vector_query(VectorQuery('colorvect_l2', query_vector, num_candidates=10))
)
result = scope.search(search_index, search_req, SearchOptions(limit=10, fields=["id", "color", "brightness", "description"]))
documents = []
for row in result.rows():
document_data = row.fields
data = {
"id": document_data.get("id", ""),
"color": document_data.get("color", ""),
"brightness": document_data.get("brightness", ""),
"description": document_data.get("description", "")
}
documents.append(Document(page_content=document_data.get("id", ""), metadata=data))
if not documents:
raise ValueError("No relevant documents found")
for document in documents:
print(document.metadata)
print("Reported total rows: {}".format(result.metadata().metrics().total_rows()))
search_meta_data = result.metadata()
print(search_meta_data)
except CouchbaseException as ex:
import traceback
traceback.print_exc()
what is new in this code
we have imported
from langchain_core.documents.base import Document
with the objective of constructing a Langchain Document from the response of the vector search
We have eliminated the N1QL query that previously only served as a guide to verify that the search results were as expected, this time we can ignore it for better clarity of the example
The search code line has been modified so that it includes the “id”, “color”, “brightness”, and “description” fields of the document in the search result
result = scope.search(search_index, search_req, SearchOptions(limit=10, fields=["id", "color", "brightness", "description"]))
now we can extract each field from the search result
documents = []
for row in result.rows():
document_data = row.fields
data = {
"id": document_data.get("id", ""),
"color": document_data.get("color", ""),
"brightness": document_data.get("brightness", ""),
"description": document_data.get("description", "")
}
and thus build a LangChain Document to be able to incorporate it into our LLM
documents.append(Document(page_content=document_data.get("id", ""), metadata=data))
Now, this is not all, because we must modify the color_index so that these fields can be attached to the search result
{
"type": "fulltext-index",
"name": "chatui._default.color_index",
"uuid": "d948b8effe3de3d7",
"sourceType": "gocbcore",
"sourceName": "chatui",
"sourceUUID": "03324e97fac08d21b52d3354c3508270",
"planParams": {
"maxPartitionsPerPIndex": 1024,
"indexPartitions": 1
},
"params": {
"doc_config": {
"docid_prefix_delim": "",
"docid_regexp": "",
"mode": "scope.collection.type_field",
"type_field": "type"
},
"mapping": {
"analysis": {},
"default_analyzer": "standard",
"default_datetime_parser": "dateTimeOptional",
"default_field": "_all",
"default_mapping": {
"dynamic": true,
"enabled": true,
"properties": {
"brightness": {
"dynamic": false,
"enabled": true,
"fields": [
{
"index": true,
"name": "brightness",
"store": true,
"type": "number"
}
]
},
"color": {
"dynamic": false,
"enabled": true,
"fields": [
{
"index": true,
"name": "color",
"store": true,
"type": "text"
}
]
},
"colorvect_l2": {
"dynamic": false,
"enabled": true,
"fields": [
{
"dims": 5,
"index": true,
"name": "colorvect_l2",
"similarity": "l2_norm",
"type": "vector",
"vector_index_optimized_for": "recall"
}
]
},
"description": {
"dynamic": false,
"enabled": true,
"fields": [
{
"index": true,
"name": "description",
"store": true,
"type": "text"
}
]
},
"id": {
"dynamic": false,
"enabled": true,
"fields": [
{
"index": true,
"name": "id",
"store": true,
"type": "text"
}
]
}
}
},
"default_type": "_default",
"docvalues_dynamic": false,
"index_dynamic": true,
"store_dynamic": false,
"type_field": "_type"
},
"store": {
"indexType": "scorch",
"segmentVersion": 16
}
},
"sourceParams": {}
}
We must pay special attention to the “store” attribute: true since otherwise the field referenced in the index will not be incorporated into the search response
With this we obtain a complete response from the vector search with everything necessary to build a langchain_core.documents.base.Document
{'id': '#FFEFD5', 'color': 'papaya whip', 'brightness': 240.82, 'description': 'Papaya whip is a soft and mellow color that can be described as a light shade of peach or coral. It has a calming and soothing effect, similar to the tropical fruit it is named after. This color is perfect for creating a warm and inviting atmosphere, and it pairs well with other pastel shades or neutral tones. Papaya whip is a versatile color that can be used in both fashion and interior design, adding a touch of elegance and sophistication to any space.'}
{'id': '#33C4FF', 'color': 'blue sky', 'brightness': 240.82, 'description': 'color similar to the blue sky in a sunny day.'}
Reported total rows: 2
SearchMetaData:{'client_context_id': '907f46-f7b8-594d-c6f1-5808906dbeebfe', 'metrics': {'took': 818764, 'total_rows': 2, 'max_score': 4.759954384762136e-06, 'success_partition_count': 1, 'error_partition_count': 0}, 'errors': {}}
Thanks for the attention, I hope I haven’t been too verbose, but I think this can be useful to many as much as I would have liked to find a complete example of Couchbase vector search like this
Cheers ![]()
