N1QL Slow Query results when joining

dipen_patel · October 13, 2017, 4:05pm

I want to be able to find all tasks with a location of ‘blah’ with a user’s name who created it. Using 4.6.3.
Task
{
base: ‘task’,
locationRoot: ‘blah’,
createdBy: 'user1’
parent: null
}

User - ID: - user1
{
	id: user1,
	name: 'bob',
	type: user
}

INDEXES
CREATE INDEX tasks_idx ON test(base,locationRoot,createdBy) WHERE (((base = “task”) and (locationRoot is not null)) and (parent is null))
CREATE INDEX createdby_idx ON test(createdBy,name)

No Join

Query
TIME - 62 ms

Select t.base, t.type, t.locationRoot, meta(t).id
From test t
Where t.base=“task” AND t.locationRoot = ‘blah’ and t.parent IS NULL

EXPLAIN
[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "IndexScan",
              "covers": [
                "cover ((`t`.`base`))",
                "cover ((`t`.`locationRoot`))",
                "cover ((`t`.`createdBy`))",
                "cover ((meta(`t`).`id`))"
              ],
              "filter_covers": {
                "cover (((`t`.`locationRoot`) is not null))": true,
                "cover (((`t`.`parent`) is null))": true,
                "cover ((`t`.`base`))": "task",
                "cover ((`t`.`parent`))": null
              },
              "index": "tasks_idx",
              "index_id": "ae1445162a704134",
              "keyspace": "test",
              "limit": "1000",
              "namespace": "default",
              "spans": [
                {
                  "Exact": true,
                  "Range": {
                    "High": [
                      "\"task\"",
                      "successor(\"blah\")"
                    ],
                    "Inclusion": 1,
                    "Low": [
                      "\"task\"",
                      "\"blah\""
                    ]
                  }
                }
              ],
              "using": "gsi"
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "Filter",
                    "condition": "(((cover ((`t`.`base`)) = \"task\") and (cover ((`t`.`locationRoot`)) = \"blah\")) and cover (((`t`.`parent`) is null)))"
                  },
                  {
                    "#operator": "InitialProject",
                    "result_terms": [
                      {
                        "expr": "cover ((`t`.`base`))"
                      },
                      {
                        "expr": "cover ((`t`.`locationRoot`))"
                      },
                      {
                        "expr": "cover ((meta(`t`).`id`))"
                      }
                    ]
                  },
                  {
                    "#operator": "FinalProject"
                  }
                ]
              }
            }
          ]
        },
        {
          "#operator": "Limit",
          "expr": "1000"
        }
      ]
    },
    "text": "Select t.base, t.locationRoot, meta(`t`).id\nFrom `test` t\nWhere t.base=\"task\" AND t.locationRoot = 'blah' and t.parent IS NULL Limit 1000"
  }
]

JOINING

Query
TIME - 2 seconds

Select t.base, t.type, t.locationRoot, meta(t).id, u.name
From test t LEFT JOIN test u ON KEYS t.createdBy
Where t.base=“task” AND t.locationRoot = ‘blah’ and t.parent IS NULL

EXPLAIN

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "index": "tasks_idx",
          "index_id": "ae1445162a704134",
          "keyspace": "test",
          "namespace": "default",
          "spans": [
            {
              "Exact": true,
              "Range": {
                "High": [
                  "\"task\"",
                  "successor(\"blah\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"task\"",
                  "\"blah\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Fetch",
          "as": "t",
          "keyspace": "test",
          "namespace": "default"
        },
        {
          "#operator": "Join",
          "as": "u",
          "keyspace": "test",
          "namespace": "default",
          "on_keys": "(`t`.`createdBy`)",
          "outer": true
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "((((`t`.`base`) = \"task\") and ((`t`.`locationRoot`) = \"blah\")) and ((`t`.`parent`) is null))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "(`t`.`base`)"
                  },
                  {
                    "expr": "(`t`.`type`)"
                  },
                  {
                    "expr": "(`t`.`locationRoot`)"
                  },
                  {
                    "expr": "(meta(`t`).`id`)"
                  },
                  {
                    "expr": "(`u`.`name`)"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "Select t.base, t.type, t.locationRoot, meta(`t`).id, u.name\nFrom `test` t  LEFT JOIN `test` u ON KEYS t.createdBy\nWhere t.base=\"task\" AND t.locationRoot = 'blah' and t.parent IS NULL"
  }
]

vsr1 · October 13, 2017, 4:32pm

Are you need t.type (Task.type) ?

CREATE INDEX tasks_idx ON test(locationRoot,createdBy, type) WHERE base = "task" AND parent  IS NULL;

SELECT  t.base, t.type, t.locationRoot, meta(t).id, u.name
FROM test t LEFT JOIN test u ON KEYS t.createdBy
WHERE t.base=“task” AND t.locationRoot = ‘blah’ AND t.parent IS NULL;

This is LOOK UP JOIN and each outer document we need to fetch inner document.

dipen_patel · October 13, 2017, 6:28pm

I need type and once I improve this query I will add more to the index

That seems to be worse.

CREATE INDEX tasks_idx ON sync_gateway_sw1(locationRoot,createdBy,type) WHERE ((base = task) and (parent is null))

[
{
“plan”: {
"#operator": “Sequence”,
"~children": [
{
"#operator": “PrimaryScan”,
“index”: “#primary”,
“keyspace”: “test”,
“namespace”: “default”,
“using”: “gsi”
},
{
"#operator": “Fetch”,
“as”: “t”,
“keyspace”: “test”,
“namespace”: “default”
},
{
"#operator": “Join”,
“as”: “u”,
“keyspace”: “test”,
“namespace”: “default”,
“on_keys”: “(t.createdBy)”,
“outer”: true
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “Filter”,
“condition”: “((((t.base) = “task”) and ((t.locationRoot) = “blah”)) and ((t.parent) is null))”
},
{
"#operator": “InitialProject”,
“result_terms”: [
{
“expr”: “(t.base)”
},
{
“expr”: “(t.type)”
},
{
“expr”: “(t.locationRoot)”
},
{
“expr”: “(meta(t).id)”
},
{
“expr”: “(u.name)”
}
]
},
{
"#operator": “FinalProject”
}
]
}
}
]
},
“text”: “SELECT t.base, t.type, t.locationRoot, meta(t).id, u.name\nFROM test t LEFT JOIN test u ON KEYS t.createdBy\nWHERE t.base=“task” AND t.locationRoot = ‘blah’ AND t.parent IS NULL;”
}
]

What the best way to get all tasks and just the username? The user may not exist.

vsr1 · October 13, 2017, 8:57pm

In CREATE INDEX there is quotes missing around task in where clause

dipen_patel · October 14, 2017, 2:18pm

Thanks, that did sped up the query.

Testing different indexes

CREATE INDEX tasks_idx ON test(type,locationRoot,createdBy) WHERE ((base = “task”) and (parent is null))
CREATE INDEX tasks_idx ON test(locationRoot,createdBy, type) WHERE ((base = “task”) and (parent is null))

#2 Index is much faster then #1, but the only different is the covering values, and you explain why?

vsr1 · October 14, 2017, 5:25pm

#2 Index is optimal. Because predicates are leading index keys in index and push the predicates to indexer start and stop range.
#1 Index will not qualify for the query because leading index key is not part of the predicate.
The following link explains how to design the index https://dzone.com/articles/designing-index-for-query-in-couchbase-n1ql
https://dzone.com/articles/n1ql-a-practicle-guide

dipen_patel · October 17, 2017, 4:53pm

Thanks

Is there a way to cover or index the joins? Using on keys spikes the OPS

vsr1 · October 17, 2017, 5:13pm

Based on the query, relation ship and predicates LOOK UP JOIN is right one. LEFT side of JOIN can be covered and not right side.

You can also set Query Service seetings
pretty=false and higher pipeline-cap, pipeline-batch

Example:
curl -u Administrator:pass http://localhost:8093/admin/settings -XPOST -d '{"pretty":false,"pipeline-batch":512,"pipeline-cap":1024}'

Topic		Replies	Views
N1ql query is slow in join even indexes are created SQL++ query , n1ql	3	579	August 14, 2022
Slow N1QL query Performance with a join SQL++	2	1560	January 14, 2022
N1Ql execution is particularly slow when searching for data accurately using array indexes SQL++ query	3	747	July 31, 2018
Optimize query with JOIN SQL++ query , n1ql , index	22	3241	March 21, 2017
Improve query performance SQL++ query , n1ql	3	655	August 15, 2022

N1QL Slow Query results when joining

Related topics