使用roaring bitmap创建倒排索引
package main
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"sync"
"testing"
)
type InvertedIndex struct {
index sync.Map
}
func NewInvertedIndex() *InvertedIndex {
return &InvertedIndex{}
}
func (idx *InvertedIndex) AddDocument(docID uint32, terms []string) {
for _, term := range terms {
bm, _ := idx.index.LoadOrStore(term, roaring.New())
bitmap := bm.(*roaring.Bitmap)
bitmap.Add(docID)
}
}
func (idx *InvertedIndex) Search(term string) *roaring.Bitmap {
if bm, ok := idx.index.Load(term); ok {
return bm.(*roaring.Bitmap).Clone()
}
return roaring.New()
}
func (idx *InvertedIndex) SearchAND(terms ...string) *roaring.Bitmap {
if len(terms) == 0 {
return roaring.New()
}
result := idx.Search(terms[0])
for _, term := range terms[1:] {
bm := idx.Search(term)
result.And(bm)
}
return result
}
func (idx *InvertedIndex) SearchOR(terms ...string) *roaring.Bitmap {
result := roaring.New()
for _, term := range terms {
bm := idx.Search(term)
result.Or(bm)
}
return result
}
func TestBitmap(t *testing.T) {
idx := NewInvertedIndex()
idx.AddDocument(1, []string{"apple", "fruit", "red"})
idx.AddDocument(2, []string{"banana", "fruit", "yellow"})
idx.AddDocument(3, []string{"apple", "pie", "sweet"})
fmt.Println("包含 'apple' 的文档:", idx.Search("apple").ToArray())
fmt.Println("同时包含 'apple' 和 'fruit' 的文档:", idx.SearchAND("apple", "fruit").ToArray())
fmt.Println("包含 'banana' 或 'pie' 的文档:", idx.SearchOR("banana", "pie").ToArray())
}