Skip to content

Commit

Permalink
fix monotonicity of doc ids
Browse files Browse the repository at this point in the history
the concurrent shatter had left open the
possibility of having non-monotonically increasing
document ids.  Fix with a sync.Cond
  • Loading branch information
scott-cotton committed Sep 16, 2021
1 parent 9979f3b commit 447033b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 20 deletions.
10 changes: 0 additions & 10 deletions cmd/dupi/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,6 @@ func (x *extractCmd) Run(args []string) error {
defer x.index.Close()
query := x.index.StartQuery(dupi.QueryMaxBlot)
shape := []dupi.Blot{
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)},
{Blot: 0, Docs: make([]dupi.Doc, 0, 32)}}
for {
Expand Down
1 change: 1 addition & 0 deletions dmd/adder.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func (t *Adder) Add(fid, start, end uint32) (uint32, error) {
return 0, err
}
}
n = uint32(len(t.buf))
t.buf = append(t.buf, fields{fid, start, end})
return n + t.flushed, nil
}
Expand Down
6 changes: 6 additions & 0 deletions post/t.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@
// dupi blots with dupi internal document ids.
package post

import "fmt"

// a post is a tuple of document id, blot
type T uint64

func (p T) Docid() uint32 {
return uint32(p >> 32)
}

func (p T) String() string {
return fmt.Sprintf("<%d,%x>", p.Docid(), p.Blot()&0xffff)
}

func (p T) Blot() uint32 {
return uint32(p)
}
Expand Down
38 changes: 28 additions & 10 deletions shatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ import (
"github.com/go-air/dupi/token"
)

type shardMsg struct {
posts []post.T
}

type shatterReq struct {
docid uint32
offset uint32
Expand All @@ -37,12 +33,13 @@ func startShatter(ns, n, s int,
tf token.TokenizerFunc, blotcfg *blotter.Config,
chns []chan []post.T) (chan *shatterReq, error) {
rch := make(chan *shatterReq)
mono := newMono()
for i := 0; i < ns; i++ {
bler, err := blotter.FromConfig(blotcfg)
if err != nil {
return nil, err
}
sh := newShatter(n, s, tf, bler)
sh := newShatter(n, s, tf, bler, mono)
copy(sh.shardChns, chns)
go func(sh *shatter) {
for {
Expand All @@ -60,22 +57,34 @@ func startShatter(ns, n, s int,
return rch, nil
}

type mono struct {
docid uint32
cond *sync.Cond
}

func newMono() *mono {
var mu sync.Mutex
return &mono{cond: sync.NewCond(&mu)}
}

type shatter struct {
tokfn token.TokenizerFunc
tokb []token.T
bler blotter.T
seqlen int
d [][]post.T
shardChns []chan []post.T
mono *mono
}

func newShatter(n, s int, tf token.TokenizerFunc, bler blotter.T) *shatter {
func newShatter(n, s int, tf token.TokenizerFunc, bler blotter.T, mono *mono) *shatter {
res := &shatter{
tokfn: tf,
bler: bler,
seqlen: s,
shardChns: make([]chan []post.T, n),
d: make([][]post.T, n)}
d: make([][]post.T, n),
mono: mono}
for i := range res.shardChns {
res.shardChns[i] = make(chan []post.T)
}
Expand All @@ -100,26 +109,35 @@ func (s *shatter) do(did, offset uint32, msg []byte) {
default:
}
}
s.send()
s.send(did)
}

func (s *shatter) send() {
func (s *shatter) send(did uint32) {
s.mono.cond.L.Lock()
for s.mono.docid != did-1 {
s.mono.cond.Wait()
}

var wg sync.WaitGroup
for i, ps := range s.d {
wg.Add(1)
go func(i int, ps []post.T) {
defer wg.Done()
s.shardChns[i] <- ps
<-s.shardChns[i]
s.d[i] = nil //ps[:0]
s.d[i] = nil //ps[:0] (was racy)

}(i, ps)
}
wg.Wait()
s.mono.docid = did
s.mono.cond.Broadcast()
s.mono.cond.L.Unlock()
}

func (s *shatter) blot(docid, b uint32) {
n := uint32(len(s.d))

i := b % n
s.d[i] = append(s.d[i], post.Make(docid, b/n))
}

0 comments on commit 447033b

Please sign in to comment.