【代码】Lucene快速入门

前言

用LuceneAPI生成索引

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package test;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

public class Test1 {

String[] a = {
"3, 华为 - 电脑, 爆款",
"4, 华为手机, 旗舰",
"5, 联想 - Thinkpad, 商务本",
"6, 联想手机, 自拍神器"
};

@Test
public void test1() throws IOException {
// 创建索引
// 存放索引数据的文件夹
FSDirectory f = FSDirectory.open(new File("~/Downloads/File").toPath());
// 配置中文分词器
IndexWriterConfig conf = new IndexWriterConfig(new SmartChineseAnalyzer());
// 索引输出工具
IndexWriter writer = new IndexWriter(f, conf);
// 遍历文档,输出索引
for (String s:
a) {
String[] arr = s.split(",");
// 用Document封装商品的数据
// 分字段进行封装
Document doc = new Document();
doc.add(new LongPoint("id", Long.parseLong(arr[0])));
doc.add(new TextField("title", arr[1], Field.Store.YES));
doc.add(new TextField("sellPoint", arr[2], Field.Store.YES));

writer.addDocument(doc);
}

writer.flush();
writer.close();
}

@Test
public void test2() throws IOException {
// 在索引中搜索数据
// 文件夹
FSDirectory d = FSDirectory.open(new File("~/Downloads/File").toPath());
// 索引读取工具
DirectoryReader reader = DirectoryReader.open(d);
// 搜索器工具
IndexSearcher searcher = new IndexSearcher(reader);
// 封装搜索的关键词
TermQuery query = new TermQuery(new Term("title", "华为"));
// 搜索,得到结果:[{doc:3, score:0.679}, {doc:1, score:0.515}]
TopDocs topDocs = searcher.search(query, 20);
// 遍历
for (ScoreDoc sd : topDocs.scoreDocs) {
int id = sd.doc;
float score = sd.score;
Document doc = searcher.doc(id);
System.out.println(id);
System.out.println(score);
System.out.println(doc.get("id"));
System.out.println(doc.get("title"));
System.out.println(doc.get("sellPoint"));
System.out.println("------------------");
}


}

}

完成