N1QL Slow Query results when joining

I want to be able to find all tasks with a location of ‘blah’ with a user’s name who created it. Using 4.6.3.
Task
{
base: ‘task’,
locationRoot: ‘blah’,
createdBy: 'user1’
parent: null
}

User - ID: - user1
{
	id: user1,
	name: 'bob',
	type: user
}

INDEXES
CREATE INDEX tasks_idx ON test(base,locationRoot,createdBy) WHERE (((base = “task”) and (locationRoot is not null)) and (parent is null))
CREATE INDEX createdby_idx ON test(createdBy,name)


No Join

Query
TIME - 62 ms

Select t.base, t.type, t.locationRoot, meta(t).id
From test t
Where t.base=“task” AND t.locationRoot = ‘blah’ and t.parent IS NULL

EXPLAIN
[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "Sequence",
          "~children": [
            {
              "#operator": "IndexScan",
              "covers": [
                "cover ((`t`.`base`))",
                "cover ((`t`.`locationRoot`))",
                "cover ((`t`.`createdBy`))",
                "cover ((meta(`t`).`id`))"
              ],
              "filter_covers": {
                "cover (((`t`.`locationRoot`) is not null))": true,
                "cover (((`t`.`parent`) is null))": true,
                "cover ((`t`.`base`))": "task",
                "cover ((`t`.`parent`))": null
              },
              "index": "tasks_idx",
              "index_id": "ae1445162a704134",
              "keyspace": "test",
              "limit": "1000",
              "namespace": "default",
              "spans": [
                {
                  "Exact": true,
                  "Range": {
                    "High": [
                      "\"task\"",
                      "successor(\"blah\")"
                    ],
                    "Inclusion": 1,
                    "Low": [
                      "\"task\"",
                      "\"blah\""
                    ]
                  }
                }
              ],
              "using": "gsi"
            },
            {
              "#operator": "Parallel",
              "~child": {
                "#operator": "Sequence",
                "~children": [
                  {
                    "#operator": "Filter",
                    "condition": "(((cover ((`t`.`base`)) = \"task\") and (cover ((`t`.`locationRoot`)) = \"blah\")) and cover (((`t`.`parent`) is null)))"
                  },
                  {
                    "#operator": "InitialProject",
                    "result_terms": [
                      {
                        "expr": "cover ((`t`.`base`))"
                      },
                      {
                        "expr": "cover ((`t`.`locationRoot`))"
                      },
                      {
                        "expr": "cover ((meta(`t`).`id`))"
                      }
                    ]
                  },
                  {
                    "#operator": "FinalProject"
                  }
                ]
              }
            }
          ]
        },
        {
          "#operator": "Limit",
          "expr": "1000"
        }
      ]
    },
    "text": "Select t.base, t.locationRoot, meta(`t`).id\nFrom `test` t\nWhere t.base=\"task\" AND t.locationRoot = 'blah' and t.parent IS NULL Limit 1000"
  }
]

JOINING

Query
TIME - 2 seconds

Select t.base, t.type, t.locationRoot, meta(t).id, u.name
From test t LEFT JOIN test u ON KEYS t.createdBy
Where t.base=“task” AND t.locationRoot = ‘blah’ and t.parent IS NULL

EXPLAIN

[
  {
    "plan": {
      "#operator": "Sequence",
      "~children": [
        {
          "#operator": "IndexScan",
          "index": "tasks_idx",
          "index_id": "ae1445162a704134",
          "keyspace": "test",
          "namespace": "default",
          "spans": [
            {
              "Exact": true,
              "Range": {
                "High": [
                  "\"task\"",
                  "successor(\"blah\")"
                ],
                "Inclusion": 1,
                "Low": [
                  "\"task\"",
                  "\"blah\""
                ]
              }
            }
          ],
          "using": "gsi"
        },
        {
          "#operator": "Fetch",
          "as": "t",
          "keyspace": "test",
          "namespace": "default"
        },
        {
          "#operator": "Join",
          "as": "u",
          "keyspace": "test",
          "namespace": "default",
          "on_keys": "(`t`.`createdBy`)",
          "outer": true
        },
        {
          "#operator": "Parallel",
          "~child": {
            "#operator": "Sequence",
            "~children": [
              {
                "#operator": "Filter",
                "condition": "((((`t`.`base`) = \"task\") and ((`t`.`locationRoot`) = \"blah\")) and ((`t`.`parent`) is null))"
              },
              {
                "#operator": "InitialProject",
                "result_terms": [
                  {
                    "expr": "(`t`.`base`)"
                  },
                  {
                    "expr": "(`t`.`type`)"
                  },
                  {
                    "expr": "(`t`.`locationRoot`)"
                  },
                  {
                    "expr": "(meta(`t`).`id`)"
                  },
                  {
                    "expr": "(`u`.`name`)"
                  }
                ]
              },
              {
                "#operator": "FinalProject"
              }
            ]
          }
        }
      ]
    },
    "text": "Select t.base, t.type, t.locationRoot, meta(`t`).id, u.name\nFrom `test` t  LEFT JOIN `test` u ON KEYS t.createdBy\nWhere t.base=\"task\" AND t.locationRoot = 'blah' and t.parent IS NULL"
  }
]

Are you need t.type (Task.type) ?

CREATE INDEX tasks_idx ON test(locationRoot,createdBy, type) WHERE base = "task" AND parent  IS NULL;

SELECT  t.base, t.type, t.locationRoot, meta(t).id, u.name
FROM test t LEFT JOIN test u ON KEYS t.createdBy
WHERE t.base=“task” AND t.locationRoot = ‘blah’ AND t.parent IS NULL;

This is LOOK UP JOIN and each outer document we need to fetch inner document.

I need type and once I improve this query I will add more to the index

That seems to be worse.

CREATE INDEX tasks_idx ON sync_gateway_sw1(locationRoot,createdBy,type) WHERE ((base = task) and (parent is null))

[
{
“plan”: {
"#operator": “Sequence”,
"~children": [
{
"#operator": “PrimaryScan”,
“index”: “#primary”,
“keyspace”: “test”,
“namespace”: “default”,
“using”: “gsi”
},
{
"#operator": “Fetch”,
“as”: “t”,
“keyspace”: “test”,
“namespace”: “default”
},
{
"#operator": “Join”,
“as”: “u”,
“keyspace”: “test”,
“namespace”: “default”,
“on_keys”: “(t.createdBy)”,
“outer”: true
},
{
"#operator": “Parallel”,
"~child": {
"#operator": “Sequence”,
"~children": [
{
"#operator": “Filter”,
“condition”: “((((t.base) = “task”) and ((t.locationRoot) = “blah”)) and ((t.parent) is null))”
},
{
"#operator": “InitialProject”,
“result_terms”: [
{
“expr”: “(t.base)”
},
{
“expr”: “(t.type)”
},
{
“expr”: “(t.locationRoot)”
},
{
“expr”: “(meta(t).id)”
},
{
“expr”: “(u.name)”
}
]
},
{
"#operator": “FinalProject”
}
]
}
}
]
},
“text”: “SELECT t.base, t.type, t.locationRoot, meta(t).id, u.name\nFROM test t LEFT JOIN test u ON KEYS t.createdBy\nWHERE t.base=“task” AND t.locationRoot = ‘blah’ AND t.parent IS NULL;”
}
]

What the best way to get all tasks and just the username? The user may not exist.

In CREATE INDEX there is quotes missing around task in where clause

Thanks, that did sped up the query.

Testing different indexes

  1. CREATE INDEX tasks_idx ON test(type,locationRoot,createdBy) WHERE ((base = “task”) and (parent is null))
  2. CREATE INDEX tasks_idx ON test(locationRoot,createdBy, type) WHERE ((base = “task”) and (parent is null))

#2 Index is much faster then #1, but the only different is the covering values, and you explain why?

#2 Index is optimal. Because predicates are leading index keys in index and push the predicates to indexer start and stop range.
#1 Index will not qualify for the query because leading index key is not part of the predicate.
The following link explains how to design the index https://dzone.com/articles/designing-index-for-query-in-couchbase-n1ql
https://dzone.com/articles/n1ql-a-practicle-guide

Thanks

Is there a way to cover or index the joins? Using on keys spikes the OPS

Based on the query, relation ship and predicates LOOK UP JOIN is right one. LEFT side of JOIN can be covered and not right side.

You can also set Query Service seetings
pretty=false and higher pipeline-cap, pipeline-batch

Example:
curl -u Administrator:pass http://localhost:8093/admin/settings -XPOST -d '{"pretty":false,"pipeline-batch":512,"pipeline-cap":1024}'