Hello,
I’m new to FTS and I’m trying to check if this solution is doable. I created an Index with all the fields that I need for a type of document. I also used the type identifier to limit the index to only those type of documents. I put the checks on index, store and docvalues and I used the keyword analyzer.
Now I’m running some queries and I’m getting some problems. I’m trying to use both SEARCH with the jsonobject and USE INDEX (USING FTS) with standard queries. I put here the query and the plan for both cases. The problems is that if I try to use the skip of 0, 10, 100, 1000 I get the results very fast. If I try to use the skip with 10000 or more the result is a lot slower and I also get more documents than the limit that I put (50).
Query with SEARCH and Object
SELECT *
FROM collection AS d USE INDEX (USING FTS)
WHERE ObjectType = 'Item' AND SEARCH(d, {
"query": {
        "must": {
            "conjuncts":[
                {
                    "field": "IsDeleted",
                    "bool": false
                },{
                    "field": "Project",
                    "match": "value"
                },{
                    "field": "IsTechnical",
                    "bool": false
                },{
                    "field": "Execution",
                    "match": "Y"
                }
            ]
        }
    },
    "sort": [
            {
                "by": "field",
                "field": "ModifiedOn",
                "desc": true,
                "mode": "default",
                "missing": "last",
                "type": "string"
            }
    ],
    "size":50,
    "from":1000})
This query runs in 180ms with no problems. This is the planner
{
  "#operator": "Authorize",
  "#stats": {
    "#phaseSwitches": 4,
    "execTime": "931ns",
    "servTime": "382.072µs"
  },
  "privileges": {
    "List": [
      {
        "Target": "collection",
        "Priv": 7,
        "Props": 0
      }
    ]
  },
  "~child": {
    "#operator": "Sequence",
    "#stats": {
      "#phaseSwitches": 2,
      "execTime": "1.613µs"
    },
    "~children": [
      {
        "#operator": "IndexFtsSearch",
        "#stats": {
          "#heartbeatYields": 1,
          "#itemsOut": 50,
          "#phaseSwitches": 203,
          "execTime": "53.269µs",
          "kernTime": "103.643µs",
          "servTime": "134.745816ms"
        },
        "as": "d",
        "bucket": "bucketname",
        "index": "Item_General",
        "index_id": "39a880613af2ed8e",
        "keyspace": "_default",
        "namespace": "default",
        "scope": "_default",
        "search_info": {
          "field": "\"\"",
          "outname": "out",
          "query": "{\"from\": 1000, \"query\": {\"must\": {\"conjuncts\": [{\"bool\": false, \"field\": \"IsDeleted\"}, {\"field\": \"Project\", \"match\": \"project\"}, {\"bool\": false, \"field\": \"IsTechnical\"}, {\"field\": \"Execution\", \"match\": \"Y\"}]}}, \"size\": 50, \"sort\": [{\"by\": \"field\", \"desc\": true, \"field\": \"ModifiedOn\", \"missing\": \"last\", \"mode\": \"default\", \"type\": \"string\"}]}"
        },
        "using": "fts",
        "#time_normal": "00:00.134",
        "#time_absolute": 0.13479908499999999
      },
      {
        "#operator": "Fetch",
        "#stats": {
          "#heartbeatYields": 1,
          "#itemsIn": 50,
          "#itemsOut": 50,
          "#phaseSwitches": 210,
          "execTime": "156.421µs",
          "kernTime": "134.872425ms",
          "servTime": "10.028027ms"
        },
        "as": "d",
        "bucket": "bucketname",
        "keyspace": "_default",
        "namespace": "default",
        "scope": "_default",
        "#time_normal": "00:00.010",
        "#time_absolute": 0.010184448
      },
      {
        "#operator": "Sequence",
        "#stats": {
          "#phaseSwitches": 2,
          "execTime": "155.711µs",
          "kernTime": "70ns",
          "state": "running"
        },
        "~children": [
          {
            "#operator": "Filter",
            "#stats": {
              "#itemsIn": 50,
              "#itemsOut": 50,
              "#phaseSwitches": 204,
              "execTime": "7.798589ms",
              "kernTime": "139.749025ms"
            },
            "condition": "(((`d`.`ObjectType`) = \"Item\") and search(`d`, {\"from\": 1000, \"query\": {\"must\": {\"conjuncts\": [{\"bool\": false, \"field\": \"IsDeleted\"}, {\"field\": \"Project\", \"match\": \"project\"}, {\"bool\": false, \"field\": \"IsTechnical\"}, {\"field\": \"Execution\", \"match\": \"Y\"}]}}, \"size\": 50, \"sort\": [{\"by\": \"field\", \"desc\": true, \"field\": \"ModifiedOn\", \"missing\": \"last\", \"mode\": \"default\", \"type\": \"string\"}]}))",
            "#time_normal": "00:00.007",
            "#time_absolute": 0.007798589
          },
          {
            "#operator": "InitialProject",
            "#stats": {
              "#itemsIn": 50,
              "#itemsOut": 50,
              "#phaseSwitches": 156,
              "execTime": "47.378µs",
              "kernTime": "142.827314ms"
            },
            "discard_original": true,
            "result_terms": [
              {
                "expr": "self",
                "star": true
              }
            ],
            "#time_normal": "00:00.000",
            "#time_absolute": 0.000047378
          }
        ],
        "#time_normal": "00:00.000",
        "#time_absolute": 0.000155711
      },
      {
        "#operator": "Stream",
        "#stats": {
          "#itemsIn": 50,
          "#itemsOut": 50,
          "#phaseSwitches": 52,
          "execTime": "4.786682ms"
        },
        "#time_normal": "00:00.004",
        "#time_absolute": 0.004786682
      }
    ],
    "#time_normal": "00:00.000",
    "#time_absolute": 0.000001613
  },
  "~versions": [
    "7.2.4-N1QL",
    "7.2.4-7070-enterprise"
  ],
  "#time_normal": "00:00.000",
  "#time_absolute": 0.000382072
}
If I put the from to 10000 or more I get 25625+ results instead of the normal size of 50.
I saw now that there is a default limit of 10k elements of (size + from).
When I create the same query with standard syntax, but using the FlexIndex, I get the results without this limitation, but I need to wait a lot of time.
The solutions that I saw is to use the search_after, but in our application we have a ‘go to page’ feature that isn’t viable with this type of logic (we can still use it, but if I am at page 0 and I want to go to page 10000 it isn’t very useful). Are there any solutions? Increasing the default value to a higher number can cause problems and/or improve the results response time? Is it better to use the search with object query or the standard query with use index (using FTS) or is it the same?