Slow Query Using "IN"

Hi, I have a performance optimization question. My query below takes about 40 seconds to run when the IN clause contains 1000 assetId’s.

SELECT RAW MAX([v.version, { 'displayName': t.displayName, 'name': f.name, 'value': v.`value`, m.assetId }])[1]
FROM assets m
UNNEST m.fields f
UNNEST f.`values` v
 LEFT JOIN assets s
   ON KEYS 'metadata-schema:' || m.schemaId
 UNNEST s.tags t
WHERE m.type = 'asset-metadata'
  AND m.assetId IN ["8db88fd8-391f-4344-88ef-e24145b22246", "48d95a51-5fa6-484a-b50a-9fbfcc63400d"]
  AND m.schemaId IS NOT MISSING
  AND f.name = t.name
  AND s.status = 1
GROUP BY t.displayName, m.assetId

When I look at the query plan, I see that the UNNEST on s.tags expands to 400k+ documents and the applies the IN filter on that result set which seems to take 30+ seconds. Is there any optimization that I do to improve the performance?

Query Plan:

{
#operator”: “Sequence”,
#stats”: {
#phaseSwitches”: 1
},
“~children”: [
{
#operator”: “Authorize”,
#stats”: {
#phaseSwitches”: 3,
“servTime”: “1.9986ms”
},
“privileges”: {
“List”: [
{
“Target”: “default:assets”,
“Priv”: 7
}
]
},
“~child”: {
#operator”: “Sequence”,
#stats”: {
#phaseSwitches”: 1
},
“~children”: [
{
#operator”: “UnionScan”,
#stats”: {
#itemsIn”: 48,
#itemsOut”: 24,
#phaseSwitches”: 151,
“kernTime”: “8.0031ms”
},
“scans”: [
{
#operator”: “IntersectScan”,
#stats”: {
#itemsIn”: 24,
#itemsOut”: 24,
#phaseSwitches”: 105,
“kernTime”: “8.0031ms”
},
“scans”: [
{
#operator”: “IndexScan3”,
#stats”: {
#phaseSwitches”: 8,
“execTime”: “2.0007ms”,
“servTime”: “6.0024ms”
},
“as”: “m”,
“index”: “asset-metadata-schemaId”,
“index_id”: “2282e5d006c383cf”,
“index_projection”: {
“primary_key”: true
},
“keyspace”: “assets”,
“namespace”: “default”,
“spans”: [
{
“exact”: true,
“range”: [
{
“inclusion”: 1,
“low”: “null”
}
]
}
],
“using”: “gsi”,
#time_normal”: “00:00.0080”,
#time_absolute”: 0.008003099999999999
},
{
#operator”: “IndexScan3”,
#stats”: {
#itemsOut”: 24,
#phaseSwitches”: 101,
“servTime”: “6.0024ms”
},
“as”: “m”,
“index”: “asset-metadata-assetId”,
“index_id”: “42b8884cd86e7b3b”,
“index_projection”: {
“primary_key”: true
},
“keyspace”: “assets”,
“namespace”: “default”,
“spans”: [
{
“exact”: true,
“range”: [
{
“high”: “"48d95a51-5fa6-484a-b50a-9fbfcc63400d"”,
“inclusion”: 3,
“low”: “"48d95a51-5fa6-484a-b50a-9fbfcc63400d"”
}
]
},
{
“exact”: true,
“range”: [
{
“high”: “"8db88fd8-391f-4344-88ef-e24145b22246"”,
“inclusion”: 3,
“low”: “"8db88fd8-391f-4344-88ef-e24145b22246"”
}
]
}
],
“using”: “gsi”,
#time_normal”: “00:00.0060”,
#time_absolute”: 0.0060024
}
]
},
{
#operator”: “IntersectScan”,
#stats”: {
#itemsIn”: 24,
#itemsOut”: 24,
#phaseSwitches”: 105,
“kernTime”: “6.0024ms”
},
“scans”: [
{
#operator”: “IndexScan3”,
#stats”: {
#itemsOut”: 24,
#phaseSwitches”: 101,
“servTime”: “6.0024ms”
},
“as”: “m”,
“index”: “asset-metadata-assetId”,
“index_id”: “42b8884cd86e7b3b”,
“index_projection”: {
“primary_key”: true
},
“keyspace”: “assets”,
“namespace”: “default”,
“spans”: [
{
“exact”: true,
“range”: [
{
“high”: “"48d95a51-5fa6-484a-b50a-9fbfcc63400d"”,
“inclusion”: 3,
“low”: “"48d95a51-5fa6-484a-b50a-9fbfcc63400d"”
}
]
},
{
“exact”: true,
“range”: [
{
“high”: “"8db88fd8-391f-4344-88ef-e24145b22246"”,
“inclusion”: 3,
“low”: “"8db88fd8-391f-4344-88ef-e24145b22246"”
}
]
}
],
“using”: “gsi”,
#time_normal”: “00:00.0060”,
#time_absolute”: 0.0060024
},
{
#operator”: “IndexScan3”,
#stats”: {
#phaseSwitches”: 8,
“servTime”: “6.0024ms”
},
“as”: “m”,
“index”: “asset-metadata-schemaId”,
“index_id”: “2282e5d006c383cf”,
“index_projection”: {
“primary_key”: true
},
“keyspace”: “assets”,
“namespace”: “default”,
“spans”: [
{
“exact”: true,
“range”: [
{
“inclusion”: 1,
“low”: “null”
}
]
}
],
“using”: “gsi”,
#time_normal”: “00:00.0060”,
#time_absolute”: 0.0060024
}
]
}
]
},
{
#operator”: “Fetch”,
#stats”: {
#itemsIn”: 24,
#itemsOut”: 24,
#phaseSwitches”: 103,
“kernTime”: “6.0024ms”,
“servTime”: “4.9992ms”
},
“as”: “m”,
“keyspace”: “assets”,
“namespace”: “default”,
#time_normal”: “00:00.0049”,
#time_absolute”: 0.0049992000000000005
},
{
#operator”: “Unnest”,
#stats”: {
#itemsIn”: 24,
#itemsOut”: 70,
#phaseSwitches”: 191,
“kernTime”: “11.0016ms”
},
“as”: “f”,
“expr”: “(m.fields)”
},
{
#operator”: “Unnest”,
#stats”: {
#itemsIn”: 70,
#itemsOut”: 70,
#phaseSwitches”: 283,
“execTime”: “1.0003ms”,
“kernTime”: “10.0013ms”
},
“as”: “v”,
“expr”: “(f.values)”,
#time_normal”: “00:00.0010”,
#time_absolute”: 0.0010003
},
{
#operator”: “Join”,
#stats”: {
#itemsIn”: 70,
#itemsOut”: 70,
#phaseSwitches”: 293,
“execTime”: “1.0004ms”,
“kernTime”: “10.0027ms”,
“servTime”: “15.0005ms”
},
“as”: “s”,
“keyspace”: “assets”,
“namespace”: “default”,
“on_keys”: “("metadata-schema:" || (m.schemaId))”,
“outer”: true,
#time_normal”: “00:00.0160”,
#time_absolute”: 0.0160009
},
{
#operator”: “Unnest”,
#stats”: {
#itemsIn”: 70,
#itemsOut”: 844,
#phaseSwitches”: 1831,
“execTime”: “3.0016ms”,
“kernTime”: “23.002ms”
},
“as”: “t”,
“expr”: “(s.tags)”,
#time_normal”: “00:00.0030”,
#time_absolute”: 0.0030015999999999997
},
{
#operator”: “Sequence”,
#stats”: {
#phaseSwitches”: 1
},
“~children”: [
{
#operator”: “Filter”,
#stats”: {
#itemsIn”: 844,
#itemsOut”: 42,
#phaseSwitches”: 1775,
“execTime”: “13.0044ms”,
“kernTime”: “18.0044ms”
},
“condition”: “(((((((m.type) = "asset-metadata") and ((m.assetId) in ["8db88fd8-391f-4344-88ef-e24145b22246", "48d95a51-5fa6-484a-b50a-9fbfcc63400d"])) and (not (ifnull((v.value), "") = ""))) and ((m.schemaId) is not missing)) and ((f.name) = (t.name))) and ((s.status) = 1))”,
#time_normal”: “00:00.0130”,
#time_absolute”: 0.013004400000000001
},
{
#operator”: “InitialGroup”,
#stats”: {
#itemsIn”: 42,
#itemsOut”: 38,
#phaseSwitches”: 163,
“execTime”: “2.9992ms”,
“kernTime”: “28.0096ms”
},
“aggregates”: [
“max([(v.version), {"assetId": (m.assetId), "displayName": (t.displayName), "name": (f.name), "value": (v.value)}])”
],
“group_keys”: [
“(t.displayName)”,
“(m.assetId)”
],
#time_normal”: “00:00.0029”,
#time_absolute”: 0.0029992
}
]
},
{
#operator”: “IntermediateGroup”,
#stats”: {
#itemsIn”: 38,
#itemsOut”: 38,
#phaseSwitches”: 155,
“execTime”: “997.9µs”,
“kernTime”: “31.0088ms”
},
“aggregates”: [
“max([(v.version), {"assetId": (m.assetId), "displayName": (t.displayName), "name": (f.name), "value": (v.value)}])”
],
“group_keys”: [
“(t.displayName)”,
“(m.assetId)”
],
#time_normal”: “00:00.0009”,
#time_absolute”: 0.0009979
},
{
#operator”: “FinalGroup”,
#stats”: {
#itemsIn”: 38,
#itemsOut”: 38,
#phaseSwitches”: 155,
“execTime”: “999.3µs”,
“kernTime”: “32.0067ms”
},
“aggregates”: [
“max([(v.version), {"assetId": (m.assetId), "displayName": (t.displayName), "name": (f.name), "value": (v.value)}])”
],
“group_keys”: [
“(t.displayName)”,
“(m.assetId)”
],
#time_normal”: “00:00.0009”,
#time_absolute”: 0.0009993
},
{
#operator”: “Sequence”,
#stats”: {
#phaseSwitches”: 1
},
“~children”: [
{
#operator”: “InitialProject”,
#stats”: {
#itemsIn”: 38,
#itemsOut”: 38,
#phaseSwitches”: 119,
“kernTime”: “33.006ms”
},
“raw”: true,
“result_terms”: [
{
“expr”: “(max([(v.version), {"assetId": (m.assetId), "displayName": (t.displayName), "name": (f.name), "value": (v.value)}])[1])”
}
]
},
{
#operator”: “FinalProject”,
#stats”: {
#itemsIn”: 38,
#itemsOut”: 38,
#phaseSwitches”: 115
}
}
]
}
]
},
#time_normal”: “00:00.0019”,
#time_absolute”: 0.0019986
},
{
#operator”: “Stream”,
#stats”: {
#itemsIn”: 38,
#itemsOut”: 38,
#phaseSwitches”: 155,
“kernTime”: “35.0046ms”
}
}
],
“~versions”: [
“2.0.0-N1QL”,
“6.0.0-1693-enterprise”
]
}

CREATE INDEX ix1 ON assets (assetId, schemaId) WHERE type = "asset-metadata"

SELECT RAW MAX([v.version, { 'displayName': t.displayName, 'name': f.name, 'value': v.`value`, m.assetId }])[1]
FROM (SELECT m1.fields, m1.schemaId, m1.assetId
      FROM assets AS m1 USE INDEX(ix1)
      WHERE m1.type = "asset-metadata"
            AND m1.assetId IN ["8db88fd8-391f-4344-88ef-e24145b22246", "48d95a51-5fa6-484a-b50a-9fbfcc63400d"]
            AND m1.schemaId IS NOT NULL) AS m
UNNEST m.fields f
UNNEST f.`values` v
LEFT JOIN assets s ON KEYS 'metadata-schema:' || m.schemaId
UNNEST s.tags t
WHERE f.name = t.name AND s.status = 1
GROUP BY t.displayName, m.assetId;

Thanks vsr1, that query reduced the query time down to 6 seconds. Is there anything else that can be done to further reduce the time? Seems like the UNNEST s.tags t record expansion to 400k docs and filter are consuming most of the query time.

It is complex. You can try this

SELECT RAW MAX([v.version, { 'displayName': u.displayName, 'name': u.name, 'value': v.`value`, m.assetId }])[1]
FROM (SELECT m1.fields, m1.schemaId, m1.assetId
      FROM assets AS m1 USE INDEX(ix1)
      WHERE m1.type = "asset-metadata"
                  AND m1.assetId IN ["8db88fd8-391f-4344-88ef-e24145b22246", "48d95a51-5fa6-484a-b50a-9fbfcc63400d"]
                  AND m1.schemaId IS NOT NULL) AS m
UNNEST (SELECT t.displayName, f.values, f.name
        FROM assets AS st USE KEYS "metadata-schema:" || m.schemaId
        UNNEST m.fields AS f
        UNNEST s.tags AS t
        WHERE st.status = 1 AND f.name = t.name) AS u
UNNEST u.`values` AS v
GROUP BY u.displayName, m.assetId;