
Tantivy是用Rust语言编写的高性能全文检索引擎,比Java写的Lucene检索效率更快,在用Tantivy别写更新文档时,需要先删除掉原有文档,再新增文档,然而,调用如下代码进行删除文档时发现无法删除:
let id=schema.get_field("id").unwrap();
let mut index_writer: IndexWriter=get_writer();
let term: Term = Term::from_field_u64(id, id_value); 经过一天多的排查,发现id字段仅是进行了存储,而未对其设置索引所致,写贴出正确代码:
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("id", NumericOptions::default().set_indexed().set_stored());
schema_builder.add_text_field("title", TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("jieba")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
)
.set_stored());
schema_builder.add_text_field("content", TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("jieba")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
)
.set_stored());
let schema = schema_builder.build();
let index=Index::create_in_dir(p, schema.clone()).unwrap();
//注册中文分词
let tokenizer = tantivy_jieba::JiebaTokenizer {};
index.tokenizers().register("jieba", tokenizer);
return index;上边代码第二行中set_indexed()是关键,只有设置字段可索引才能通过该字段进行删除文档,否则文档不起任何作用。
以下是新建、更新、删除文档的正确代码:
//单条数据新建索引
pub async fn create_article_index(article_id: u64){
let schema=INDEX.schema();
let id = schema.get_field("id").unwrap();
let title = schema.get_field("title").unwrap();
let content = schema.get_field("content").unwrap();
let mut index_writer: IndexWriter=get_writer();
//从数据库读取
let res=orm::get_article(article_id.try_into().unwrap()).await;
if let Ok(list) = res {
for item in list {
let mut t_title="".to_string();
let mut t_content="".to_string();
if let Some(t) = item.title.clone() {
t_title=t;
}
if let Some(t) = item.content.clone() {
t_content=t;
}
let ids:u64 = item.id.try_into().unwrap();
index_writer.add_document(doc!(
id => ids,
title => t_title,
content => t_content
)).unwrap();
}
index_writer.commit().unwrap();
}
}
//更新单条数据索引
pub async fn update_article_index(id: u64){
del_article_index(id);
create_article_index(id).await;
}
//删除单条数据索引
pub fn del_article_index(idu: u64){
let schema=INDEX.schema();
let id=schema.get_field("id").unwrap();
let mut index_writer: IndexWriter=get_writer();
let term: Term = Term::from_field_u64(id, idu);
// let term: Term = Term::from_field_text(title, "978-9176370711");
println!("删除索引term成功:{:?}",term);
let del_res=index_writer.delete_term(term.clone());
println!("删除结果:{}",del_res);
let ssss = index_writer.commit().unwrap();
println!("删除索引ID成功:{},{}",idu,ssss);
}
//初始化全局Index
lazy_static!{
static ref INDEX:Index = get_index();
}
fn get_index()->Index{
let _ = fs::create_dir("article_index");
let p = Path::new("article_index");
// let path = Path::new("article_index");
let index_res= Index::open_in_dir(p);
match index_res {
Ok(index)=>{
//注册中文分词
let tokenizer = tantivy_jieba::JiebaTokenizer {};
index.tokenizers().register("jieba", tokenizer);
return index;
},
Err(e)=>{
println!("错误----:{}",e);
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("id", NumericOptions::default().set_indexed().set_stored());
schema_builder.add_text_field("title", TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("jieba")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
)
.set_stored());
schema_builder.add_text_field("content", TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("jieba")
.set_index_option(IndexRecordOption::WithFreqsAndPositions),
)
.set_stored());
let schema = schema_builder.build();
// let init_dir = tantivy::directory::MmapDirectory::open("article_index").unwrap();
let index=Index::create_in_dir(p, schema.clone()).unwrap();
//注册中文分词
let tokenizer = tantivy_jieba::JiebaTokenizer {};
index.tokenizers().register("jieba", tokenizer);
return index;
}
}
}
2013-2024 LifeAdd生活方式 www.lifeadd.cn 版权所有 | 御融(北京)科技有限公司 All Rights
Reserved
增值电信业务经营许可证:京B2-20200664 | 京ICP备14004911号-7