fix(knowledge): Fix the issue of ineffective pagination parameters in the image-based knowledge base (#831)

This commit is contained in:
liuyunchao-1998 2025-08-20 17:49:38 +08:00 committed by GitHub
parent f956c18a09
commit e7070b419c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 168 additions and 59 deletions

View File

@ -27,8 +27,15 @@ type WhereSliceOpt struct {
DocumentID int64 DocumentID int64
DocumentIDs []int64 DocumentIDs []int64
Keyword *string Keyword *string
Sequence int64
PageSize int64 PageSize int64
Offset int64 Offset int64
NotEmpty *bool NotEmpty *bool
} }
type WherePhotoSliceOpt struct {
KnowledgeID int64
DocumentIDs []int64
Limit *int
Offset *int
HasCaption *bool
}

View File

@ -236,8 +236,11 @@ func (dao *KnowledgeDocumentSliceDAO) FindSliceByCondition(ctx context.Context,
if opts.PageSize != 0 { if opts.PageSize != 0 {
do = do.Limit(int(opts.PageSize)) do = do.Limit(int(opts.PageSize))
do = do.Offset(int(opts.Sequence)).Order(s.Sequence.Asc())
} }
if opts.Offset != 0 {
do = do.Offset(int(opts.Offset))
}
do = do.Order(s.Sequence.Asc())
if opts.NotEmpty != nil { if opts.NotEmpty != nil {
if ptr.From(opts.NotEmpty) { if ptr.From(opts.NotEmpty) {
do = do.Where(s.Content.Neq("")) do = do.Where(s.Content.Neq(""))
@ -319,3 +322,44 @@ func (dao *KnowledgeDocumentSliceDAO) GetLastSequence(ctx context.Context, docum
} }
return resp.Sequence, nil return resp.Sequence, nil
} }
func (dao *KnowledgeDocumentSliceDAO) ListPhotoSlice(ctx context.Context, opts *entity.WherePhotoSliceOpt) ([]*model.KnowledgeDocumentSlice, int64, error) {
s := dao.Query.KnowledgeDocumentSlice
do := s.WithContext(ctx)
if opts.KnowledgeID != 0 {
do = do.Where(s.KnowledgeID.Eq(opts.KnowledgeID))
}
if len(opts.DocumentIDs) != 0 {
do = do.Where(s.DocumentID.In(opts.DocumentIDs...))
}
if ptr.From(opts.Limit) != 0 {
do = do.Limit(int(ptr.From(opts.Limit)))
}
if ptr.From(opts.Offset) != 0 {
do = do.Offset(int(ptr.From(opts.Offset)))
}
if opts.HasCaption != nil {
if ptr.From(opts.HasCaption) {
do = do.Where(s.Content.Neq(""))
} else {
do = do.Where(s.Content.Eq(""))
}
}
do = do.Order(s.UpdatedAt.Desc())
pos, err := do.Find()
if err != nil {
return nil, 0, err
}
total, err := do.Limit(-1).Offset(-1).Count()
if err != nil {
return nil, 0, err
}
return pos, total, nil
}
func (dao *KnowledgeDocumentSliceDAO) BatchCreateWithTX(ctx context.Context, tx *gorm.DB, slices []*model.KnowledgeDocumentSlice) error {
if len(slices) == 0 {
return nil
}
return tx.WithContext(ctx).Debug().Model(&model.KnowledgeDocumentSlice{}).CreateInBatches(slices, 100).Error
}

View File

@ -49,8 +49,9 @@ type baseDocProcessor struct {
documentSource *entity.DocumentSource documentSource *entity.DocumentSource
// Drop DB model // Drop DB model
TableName string TableName string
docModels []*model.KnowledgeDocument docModels []*model.KnowledgeDocument
imageSlices []*model.KnowledgeDocumentSlice
storage storage.Storage storage storage.Storage
knowledgeRepo repository.KnowledgeRepo knowledgeRepo repository.KnowledgeRepo
@ -69,14 +70,14 @@ func (p *baseDocProcessor) BeforeCreate() error {
func (p *baseDocProcessor) BuildDBModel() error { func (p *baseDocProcessor) BuildDBModel() error {
p.docModels = make([]*model.KnowledgeDocument, 0, len(p.Documents)) p.docModels = make([]*model.KnowledgeDocument, 0, len(p.Documents))
ids, err := p.idgen.GenMultiIDs(p.ctx, len(p.Documents))
if err != nil {
logs.CtxErrorf(p.ctx, "gen ids failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeIDGenCode)
}
for i := range p.Documents { for i := range p.Documents {
id, err := p.idgen.GenID(p.ctx)
if err != nil {
logs.CtxErrorf(p.ctx, "gen id failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeIDGenCode)
}
docModel := &model.KnowledgeDocument{ docModel := &model.KnowledgeDocument{
ID: ids[i], ID: id,
KnowledgeID: p.Documents[i].KnowledgeID, KnowledgeID: p.Documents[i].KnowledgeID,
Name: p.Documents[i].Name, Name: p.Documents[i].Name,
FileExtension: string(p.Documents[i].FileExtension), FileExtension: string(p.Documents[i].FileExtension),
@ -95,6 +96,23 @@ func (p *baseDocProcessor) BuildDBModel() error {
} }
p.Documents[i].ID = docModel.ID p.Documents[i].ID = docModel.ID
p.docModels = append(p.docModels, docModel) p.docModels = append(p.docModels, docModel)
if p.Documents[i].Type == knowledge.DocumentTypeImage {
id, err := p.idgen.GenID(p.ctx)
if err != nil {
logs.CtxErrorf(p.ctx, "gen id failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeIDGenCode)
}
p.imageSlices = append(p.imageSlices, &model.KnowledgeDocumentSlice{
ID: id,
KnowledgeID: p.Documents[i].KnowledgeID,
DocumentID: p.Documents[i].ID,
CreatedAt: time.Now().UnixMilli(),
UpdatedAt: time.Now().UnixMilli(),
CreatorID: p.UserID,
SpaceID: p.SpaceID,
Status: int32(knowledge.SliceStatusInit),
})
}
} }
return nil return nil
@ -142,6 +160,11 @@ func (p *baseDocProcessor) InsertDBModel() (err error) {
logs.CtxErrorf(ctx, "create document failed, err: %v", err) logs.CtxErrorf(ctx, "create document failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error())) return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
} }
err = p.sliceRepo.BatchCreateWithTX(ctx, tx, p.imageSlices)
if err != nil {
logs.CtxErrorf(ctx, "update knowledge failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))
}
err = p.knowledgeRepo.UpdateWithTx(ctx, tx, p.Documents[0].KnowledgeID, map[string]interface{}{ err = p.knowledgeRepo.UpdateWithTx(ctx, tx, p.Documents[0].KnowledgeID, map[string]interface{}{
"updated_at": time.Now().UnixMilli(), "updated_at": time.Now().UnixMilli(),
}) })

View File

@ -84,12 +84,12 @@ type KnowledgeDocumentSliceRepo interface {
Create(ctx context.Context, slice *model.KnowledgeDocumentSlice) error Create(ctx context.Context, slice *model.KnowledgeDocumentSlice) error
Update(ctx context.Context, slice *model.KnowledgeDocumentSlice) error Update(ctx context.Context, slice *model.KnowledgeDocumentSlice) error
Delete(ctx context.Context, slice *model.KnowledgeDocumentSlice) error Delete(ctx context.Context, slice *model.KnowledgeDocumentSlice) error
BatchCreateWithTX(ctx context.Context, tx *gorm.DB, slices []*model.KnowledgeDocumentSlice) error
BatchCreate(ctx context.Context, slices []*model.KnowledgeDocumentSlice) error BatchCreate(ctx context.Context, slices []*model.KnowledgeDocumentSlice) error
BatchSetStatus(ctx context.Context, ids []int64, status int32, reason string) error BatchSetStatus(ctx context.Context, ids []int64, status int32, reason string) error
DeleteByDocument(ctx context.Context, documentID int64) error DeleteByDocument(ctx context.Context, documentID int64) error
MGetSlices(ctx context.Context, sliceIDs []int64) ([]*model.KnowledgeDocumentSlice, error) MGetSlices(ctx context.Context, sliceIDs []int64) ([]*model.KnowledgeDocumentSlice, error)
ListPhotoSlice(ctx context.Context, opts *entity.WherePhotoSliceOpt) ([]*model.KnowledgeDocumentSlice, int64, error)
FindSliceByCondition(ctx context.Context, opts *entity.WhereSliceOpt) ( FindSliceByCondition(ctx context.Context, opts *entity.WhereSliceOpt) (
[]*model.KnowledgeDocumentSlice, int64, error) []*model.KnowledgeDocumentSlice, int64, error)
GetDocumentSliceIDs(ctx context.Context, docIDs []int64) (sliceIDs []int64, err error) GetDocumentSliceIDs(ctx context.Context, docIDs []int64) (sliceIDs []int64, err error)

View File

@ -190,24 +190,26 @@ func (k *knowledgeSVC) indexDocument(ctx context.Context, event *entity.Event) (
collectionName := getCollectionName(doc.KnowledgeID) collectionName := getCollectionName(doc.KnowledgeID)
if !doc.IsAppend { if !doc.IsAppend {
ids, err := k.sliceRepo.GetDocumentSliceIDs(ctx, []int64{doc.ID}) if doc.Type != knowledge.DocumentTypeImage {
if err != nil { ids, err := k.sliceRepo.GetDocumentSliceIDs(ctx, []int64{doc.ID})
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("get document slice ids failed, err: %v", err))) if err != nil {
} return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("get document slice ids failed, err: %v", err)))
if len(ids) > 0 {
if err = k.sliceRepo.DeleteByDocument(ctx, doc.ID); err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("delete document slice failed, err: %v", err)))
} }
for _, manager := range k.searchStoreManagers { if len(ids) > 0 {
s, err := manager.GetSearchStore(ctx, collectionName) if err = k.sliceRepo.DeleteByDocument(ctx, doc.ID); err != nil {
if err != nil { return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("delete document slice failed, err: %v", err)))
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", fmt.Sprintf("get search store failed, err: %v", err)))
} }
if err := s.Delete(ctx, slices.Transform(event.SliceIDs, func(id int64) string { for _, manager := range k.searchStoreManagers {
return strconv.FormatInt(id, 10) s, err := manager.GetSearchStore(ctx, collectionName)
})); err != nil { if err != nil {
logs.Errorf("[indexDocument] delete knowledge failed, err: %v", err) return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", fmt.Sprintf("get search store failed, err: %v", err)))
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", fmt.Sprintf("delete search store failed, err: %v", err))) }
if err := s.Delete(ctx, slices.Transform(event.SliceIDs, func(id int64) string {
return strconv.FormatInt(id, 10)
})); err != nil {
logs.Errorf("[indexDocument] delete knowledge failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeSearchStoreCode, errorx.KV("msg", fmt.Sprintf("delete search store failed, err: %v", err)))
}
} }
} }
} }
@ -298,34 +300,68 @@ func (k *knowledgeSVC) indexDocument(ctx context.Context, event *entity.Event) (
seqOffset += 1 seqOffset += 1
} }
sliceModels := make([]*model.KnowledgeDocumentSlice, 0, len(parseResult)) if doc.Type == knowledge.DocumentTypeImage {
for i, src := range parseResult { if len(parseResult) != 0 {
now := time.Now().UnixMilli() slices, _, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{DocumentID: doc.ID})
sliceModel := &model.KnowledgeDocumentSlice{
ID: allIDs[i],
KnowledgeID: doc.KnowledgeID,
DocumentID: doc.ID,
Content: parseResult[i].Content,
Sequence: seqOffset + float64(i),
CreatedAt: now,
UpdatedAt: now,
CreatorID: doc.CreatorID,
SpaceID: doc.SpaceID,
Status: int32(model.SliceStatusProcessing),
FailReason: "",
}
if doc.Type == knowledge.DocumentTypeTable {
sliceEntity, err := convertFn(src, doc.KnowledgeID, doc.ID, doc.CreatorID)
if err != nil { if err != nil {
logs.CtxErrorf(ctx, "[indexDocument] convert document failed, err: %v", err) return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("find slice failed, err: %v", err)))
return errorx.New(errno.ErrKnowledgeSystemCode, errorx.KV("msg", fmt.Sprintf("convert document failed, err: %v", err))) }
var slice *model.KnowledgeDocumentSlice
if len(slices) > 0 {
slice = slices[0]
slice.Content = parseResult[0].Content
} else {
id, err := k.idgen.GenID(ctx)
if err != nil {
return errorx.New(errno.ErrKnowledgeIDGenCode, errorx.KV("msg", fmt.Sprintf("GenID failed, err: %v", err)))
}
slice = &model.KnowledgeDocumentSlice{
ID: id,
KnowledgeID: doc.KnowledgeID,
DocumentID: doc.ID,
Content: parseResult[0].Content,
CreatedAt: time.Now().UnixMilli(),
UpdatedAt: time.Now().UnixMilli(),
CreatorID: doc.CreatorID,
SpaceID: doc.SpaceID,
Status: int32(model.SliceStatusProcessing),
FailReason: "",
}
}
if err = k.sliceRepo.Update(ctx, slice); err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("update slice failed, err: %v", err)))
} }
sliceModel.Content = sliceEntity.GetSliceContent()
} }
sliceModels = append(sliceModels, sliceModel) } else {
} sliceModels := make([]*model.KnowledgeDocumentSlice, 0, len(parseResult))
if err = k.sliceRepo.BatchCreate(ctx, sliceModels); err != nil { for i, src := range parseResult {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("batch create slice failed, err: %v", err))) now := time.Now().UnixMilli()
sliceModel := &model.KnowledgeDocumentSlice{
ID: allIDs[i],
KnowledgeID: doc.KnowledgeID,
DocumentID: doc.ID,
Content: parseResult[i].Content,
Sequence: seqOffset + float64(i),
CreatedAt: now,
UpdatedAt: now,
CreatorID: doc.CreatorID,
SpaceID: doc.SpaceID,
Status: int32(model.SliceStatusProcessing),
FailReason: "",
}
if doc.Type == knowledge.DocumentTypeTable {
sliceEntity, err := convertFn(src, doc.KnowledgeID, doc.ID, doc.CreatorID)
if err != nil {
logs.CtxErrorf(ctx, "[indexDocument] convert document failed, err: %v", err)
return errorx.New(errno.ErrKnowledgeSystemCode, errorx.KV("msg", fmt.Sprintf("convert document failed, err: %v", err)))
}
sliceModel.Content = sliceEntity.GetSliceContent()
}
sliceModels = append(sliceModels, sliceModel)
}
if err = k.sliceRepo.BatchCreate(ctx, sliceModels); err != nil {
return errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", fmt.Sprintf("batch create slice failed, err: %v", err)))
}
} }
defer func() { defer func() {

View File

@ -876,9 +876,8 @@ func (k *knowledgeSVC) ListSlice(ctx context.Context, request *ListSliceRequest)
KnowledgeID: ptr.From(request.KnowledgeID), KnowledgeID: ptr.From(request.KnowledgeID),
DocumentID: ptr.From(request.DocumentID), DocumentID: ptr.From(request.DocumentID),
Keyword: request.Keyword, Keyword: request.Keyword,
Sequence: request.Sequence, Offset: request.Sequence,
PageSize: request.Limit, PageSize: request.Limit,
Offset: request.Offset,
}) })
if err != nil { if err != nil {
logs.CtxErrorf(ctx, "list slice failed, err: %v", err) logs.CtxErrorf(ctx, "list slice failed, err: %v", err)
@ -1375,12 +1374,12 @@ func (k *knowledgeSVC) ListPhotoSlice(ctx context.Context, request *ListPhotoSli
if request == nil { if request == nil {
return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty")) return nil, errorx.New(errno.ErrKnowledgeInvalidParamCode, errorx.KV("msg", "request is empty"))
} }
sliceArr, total, err := k.sliceRepo.FindSliceByCondition(ctx, &entity.WhereSliceOpt{ sliceArr, total, err := k.sliceRepo.ListPhotoSlice(ctx, &entity.WherePhotoSliceOpt{
KnowledgeID: request.KnowledgeID, KnowledgeID: request.KnowledgeID,
DocumentIDs: request.DocumentIDs, DocumentIDs: request.DocumentIDs,
Offset: int64(ptr.From(request.Offset)), Offset: request.Offset,
PageSize: int64(ptr.From(request.Limit)), Limit: request.Limit,
NotEmpty: request.HasCaption, HasCaption: request.HasCaption,
}) })
if err != nil { if err != nil {
return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error())) return nil, errorx.New(errno.ErrKnowledgeDBCode, errorx.KV("msg", err.Error()))