// Copyright (c) 2018 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package searcher import ( "bytes" "container/heap" "math" "reflect" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/scorer" "github.com/blevesearch/bleve/size" ) var reflectStaticSizeDisjunctionHeapSearcher int var reflectStaticSizeSearcherCurr int func init() { var dhs DisjunctionHeapSearcher reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size()) var sc SearcherCurr reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size()) } type SearcherCurr struct { searcher search.Searcher curr *search.DocumentMatch } type DisjunctionHeapSearcher struct { indexReader index.IndexReader numSearchers int scorer *scorer.DisjunctionQueryScorer min int queryNorm float64 initialized bool searchers []search.Searcher heap []*SearcherCurr matching []*search.DocumentMatch matchingCurrs []*SearcherCurr } func newDisjunctionHeapSearcher(indexReader index.IndexReader, searchers []search.Searcher, min float64, options search.SearcherOptions, limit bool) ( *DisjunctionHeapSearcher, error) { if limit && tooManyClauses(len(searchers)) { return nil, tooManyClausesErr("", len(searchers)) } // build our searcher rv := DisjunctionHeapSearcher{ indexReader: indexReader, searchers: searchers, numSearchers: len(searchers), scorer: scorer.NewDisjunctionQueryScorer(options), min: int(min), matching: make([]*search.DocumentMatch, len(searchers)), matchingCurrs: make([]*SearcherCurr, len(searchers)), heap: make([]*SearcherCurr, 0, len(searchers)), } rv.computeQueryNorm() return &rv, nil } func (s *DisjunctionHeapSearcher) Size() int { sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr + s.scorer.Size() for _, entry := range s.searchers { sizeInBytes += entry.Size() } for _, entry := range s.matching { if entry != nil { sizeInBytes += entry.Size() } } // for matchingCurrs and heap, just use static size * len // since searchers and document matches already counted above sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr return sizeInBytes } func (s *DisjunctionHeapSearcher) computeQueryNorm() { // first calculate sum of squared weights sumOfSquaredWeights := 0.0 for _, searcher := range s.searchers { sumOfSquaredWeights += searcher.Weight() } // now compute query norm from this s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) // finally tell all the downstream searchers the norm for _, searcher := range s.searchers { searcher.SetQueryNorm(s.queryNorm) } } func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error { // alloc a single block of SearcherCurrs block := make([]SearcherCurr, len(s.searchers)) // get all searchers pointing at their first match for i, searcher := range s.searchers { curr, err := searcher.Next(ctx) if err != nil { return err } if curr != nil { block[i].searcher = searcher block[i].curr = curr heap.Push(s, &block[i]) } } err := s.updateMatches() if err != nil { return err } s.initialized = true return nil } func (s *DisjunctionHeapSearcher) updateMatches() error { matching := s.matching[:0] matchingCurrs := s.matchingCurrs[:0] if len(s.heap) > 0 { // top of the heap is our next hit next := heap.Pop(s).(*SearcherCurr) matching = append(matching, next.curr) matchingCurrs = append(matchingCurrs, next) // now as long as top of heap matches, keep popping for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 { next = heap.Pop(s).(*SearcherCurr) matching = append(matching, next.curr) matchingCurrs = append(matchingCurrs, next) } } s.matching = matching s.matchingCurrs = matchingCurrs return nil } func (s *DisjunctionHeapSearcher) Weight() float64 { var rv float64 for _, searcher := range s.searchers { rv += searcher.Weight() } return rv } func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) { for _, searcher := range s.searchers { searcher.SetQueryNorm(qnorm) } } func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) ( *search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers(ctx) if err != nil { return nil, err } } var rv *search.DocumentMatch found := false for !found && len(s.matching) > 0 { if len(s.matching) >= s.min { found = true // score this match rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers) } // invoke next on all the matching searchers for _, matchingCurr := range s.matchingCurrs { if matchingCurr.curr != rv { ctx.DocumentMatchPool.Put(matchingCurr.curr) } curr, err := matchingCurr.searcher.Next(ctx) if err != nil { return nil, err } if curr != nil { matchingCurr.curr = curr heap.Push(s, matchingCurr) } } err := s.updateMatches() if err != nil { return nil, err } } return rv, nil } func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { if !s.initialized { err := s.initSearchers(ctx) if err != nil { return nil, err } } // if there is anything in matching, toss it back onto the heap for _, matchingCurr := range s.matchingCurrs { heap.Push(s, matchingCurr) } s.matching = s.matching[:0] s.matchingCurrs = s.matchingCurrs[:0] // find all searchers that actually need to be advanced // advance them, using s.matchingCurrs as temp storage for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 { searcherCurr := heap.Pop(s).(*SearcherCurr) ctx.DocumentMatchPool.Put(searcherCurr.curr) curr, err := searcherCurr.searcher.Advance(ctx, ID) if err != nil { return nil, err } if curr != nil { searcherCurr.curr = curr s.matchingCurrs = append(s.matchingCurrs, searcherCurr) } } // now all of the searchers that we advanced have to be pushed back for _, matchingCurr := range s.matchingCurrs { heap.Push(s, matchingCurr) } // reset our temp space s.matchingCurrs = s.matchingCurrs[:0] err := s.updateMatches() if err != nil { return nil, err } return s.Next(ctx) } func (s *DisjunctionHeapSearcher) Count() uint64 { // for now return a worst case var sum uint64 for _, searcher := range s.searchers { sum += searcher.Count() } return sum } func (s *DisjunctionHeapSearcher) Close() (rv error) { for _, searcher := range s.searchers { err := searcher.Close() if err != nil && rv == nil { rv = err } } return rv } func (s *DisjunctionHeapSearcher) Min() int { return s.min } func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int { rv := len(s.searchers) for _, s := range s.searchers { rv += s.DocumentMatchPoolSize() } return rv } // a disjunction searcher implements the index.Optimizable interface // but only activates on an edge case where the disjunction is a // wrapper around a single Optimizable child searcher func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) ( index.OptimizableContext, error) { if len(s.searchers) == 1 { o, ok := s.searchers[0].(index.Optimizable) if ok { return o.Optimize(kind, octx) } } return nil, nil } // heap impl func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) } func (s *DisjunctionHeapSearcher) Less(i, j int) bool { if s.heap[i].curr == nil { return true } else if s.heap[j].curr == nil { return false } return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0 } func (s *DisjunctionHeapSearcher) Swap(i, j int) { s.heap[i], s.heap[j] = s.heap[j], s.heap[i] } func (s *DisjunctionHeapSearcher) Push(x interface{}) { s.heap = append(s.heap, x.(*SearcherCurr)) } func (s *DisjunctionHeapSearcher) Pop() interface{} { old := s.heap n := len(old) x := old[n-1] s.heap = old[0 : n-1] return x }