Hi all,
I have count query that I want to improve (currently the response time is ~27s with 3M of records):
SELECT COUNT(DISTINCT `subject`) AS count FROM block WHERE `subject` IS NOT MISSING AND `_class` = "com.xxx.yyy.zzz.model.Block" AND `type` IS NOT MISSING AND `type` IN ['http://my.host.com/xxx/yyy/core']
The explain show a covering index:
[
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"covers": [
"cover ((`block`.`subject`))",
"cover ((`block`.`type`))",
"cover ((`block`.`lastModification`))",
"cover ((meta(`block`).`id`))"
],
"filter_covers": {
"cover ((`block`.`_class`))": "com.xxx.yyy.zzz.model.Block"
},
"index": "i_subject_type_lastModification_class",
"index_id": "36a2933006736836",
"keyspace": "block",
"namespace": "default",
"spans": [
{
"Range": {
"Inclusion": 0,
"Low": [
"null",
"\"http://my.host.com/xxx/yyy/core\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((((cover ((`block`.`subject`)) is not missing) and (cover ((`block`.`_class`)) = \"com.xxx.yyy.zzz.model.Block\")) and (cover ((`block`.`type`)) is not missing)) and (cover ((`block`.`type`)) in [\"http://my.host.com/xxx/yyy/core\"]))"
},
{
"#operator": "InitialGroup",
"aggregates": [
"count(distinct cover ((`block`.`subject`)))"
],
"group_keys": []
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"count(distinct cover ((`block`.`subject`)))"
],
"group_keys": []
},
{
"#operator": "FinalGroup",
"aggregates": [
"count(distinct cover ((`block`.`subject`)))"
],
"group_keys": []
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "count",
"expr": "count(distinct cover ((`block`.`subject`)))"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "\nSELECT \nCOUNT(DISTINCT `subject`) AS count \nFROM block WHERE `subject` IS NOT MISSING AND `_class` = \"com.xxx.yyy.zzz.model.Block\" \nAND `type` IS NOT MISSING AND `type` IN ['http://my.host.com/xxx/yyy/core']"
}
]
I have the following index:
CREATE INDEX
i_subject_type_lastModification_classON
block(
subject,
type,
lastModification) WHERE (
_class= "com.xxx.yyy.zzz.model.Block")
The stored documents have the following structure:
{
"creator": "system",
"subject": "http://my.host.com/data/subjects/259",
"_class": "com.xxx.yyy.zzz.model.Block",
"type": "http://my.host.com/xxx/yyy/core",
"body": {
<body-can-have-a-random-json-object>
},
"creationDate": 1474400777951
}
I wonder if its possible to use a IndexCountScan in this query. How can I do that?
Thanks!