Lucene5.x索引操作工具类封装

Lucene5.x索引操作工具类封装

  •  2019 年 3 月 26 日
  •  759
  •  Java Lucene 

该工具类是基于Lucene5.x版本进行封装的,分词器使用的是IKAnalyzer,如果不了解如何使用5.x版本整合IK的话请先看Lucene 5.x 集成中文分词库 IKAnalyzer

高亮参数封装

/**
 * 高亮参数封装
 */
public class HighlightParam {

    /**
     * 是否需要设置高亮
     */
    private boolean highlight;
    /**
     * 需要设置高亮的属性名
     */
    private String[] highlightFields;
    /**
     * 高亮前缀,添加默认值
     */
    private String prefix = "<font color=\"red\">";
    /**
     * 高亮后缀,添加默认值
     */
    private String suffix = "</font>";
    /**
     * 显示摘要最大长度,默认100
     */
    private int summaryLength = 100;


    public HighlightParam(String... highlightFields) {
        this.highlight = true;
        this.highlightFields = highlightFields;
    }

    public HighlightParam(String[] highlightFields, String prefix, String suffix, int summaryLength) {
        this.highlight = true;
        this.highlightFields = highlightFields;
        this.prefix = prefix;
        this.suffix = suffix;
        this.summaryLength = summaryLength;
    }

    public boolean isHighlight() {
        return highlight;
    }

    public void setHighlight(boolean highlight) {
        this.highlight = highlight;
    }

    public String[] getHighlightFields() {
        return highlightFields;
    }

    public void setHighlightFields(String[] highlightFields) {
        this.highlightFields = highlightFields;
    }

    public String getPrefix() {
        return prefix;
    }

    public void setPrefix(String prefix) {
        this.prefix = prefix;
    }

    public String getSuffix() {
        return suffix;
    }

    public void setSuffix(String suffix) {
        this.suffix = suffix;
    }

    public int getSummaryLength() {
        return summaryLength;
    }

    public void setSummaryLength(int summaryLength) {
        this.summaryLength = summaryLength;
    }
}

简单分页工具类

import java.util.List;

/**
 * 简单分页工具类
 *
 * @param <T>
 */
public class Page<T> {

    private int totalRecord;

    private int totalPage;

    private int currPage = 1;

    private int pageSize = 10;

    private int startPos;

    private int endPos;

    private List<T> list;

    /*计算总页数、起始位置、结束位置*/
    private void calculatePage() {
        this.totalPage = (int) Math.ceil((double) this.totalRecord / this.pageSize);
        if (this.currPage <= 0) {
            this.currPage = 1;
        }
        if (this.currPage > this.totalPage && this.totalPage != 0) {
            this.currPage = this.totalPage;
        }
        this.startPos = this.currPage > 1 ? (this.currPage - 1) * this.pageSize : 0;
        this.endPos = this.currPage * this.pageSize;
    }

    private Page() {
    }

    public Page(int currPage, int pageSize) {
        this.currPage = currPage;
        this.pageSize = pageSize;
    }

    public int getTotalRecord() {
        return totalRecord;
    }

    public void setTotalRecord(int totalRecord) {
        this.totalRecord = totalRecord;
        calculatePage();
    }

    public int getTotalPage() {
        return totalPage;
    }

    public int getCurrPage() {
        return currPage;
    }

    public int getPageSize() {
        return pageSize;
    }

    public int getStartPos() {
        return startPos;
    }

    public int getEndPos() {
        return endPos;
    }

    public List<T> getList() {
        return list;
    }

    public void setList(List<T> list) {
        this.list = list;
    }

}

Lucene工具类

import com.bmg.common.util.PropertiesUtil;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.*;

/**
 * Lucene5.x工具类
 */
public class LuceneUtil {

    public static final Analyzer analyzer = new IKAnalyzer5x();

    private static IndexWriter writer;

    private static FSDirectory directory;

    static {
        try {
            String luceneDir = PropertiesUtil.getProperty("application.properties", "lucene.dir");
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            directory = FSDirectory.open(Paths.get(luceneDir));
            writer = new IndexWriter(directory, config);
        } catch (Exception e) {
            e.printStackTrace();
        }

        //线程结束时关闭IndexWriter
        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            try {
                if (writer != null) {
                    writer.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }));
    }

    public static IndexReader getIndexReader() throws IOException {
        return DirectoryReader.open(directory);
    }

    public static IndexSearcher getIndexSearcher() throws IOException {
        return new IndexSearcher(getIndexReader());
    }


    /**
     * 添加索引
     *
     * @param document Document
     */
    public static void add(Document document) {
        try {
            writer.addDocument(document);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除索引
     *
     * @param terms Term...
     */
    public static void delete(Term... terms) {
        try {
            writer.deleteDocuments(terms);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 删除所有索引
     */
    public static void deleteAll() {
        try {
            writer.deleteAll();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 更新索引
     *
     * @param term     Term
     * @param document Document
     */
    public static void update(Term term, Document document) {
        try {
            writer.updateDocument(term, document);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 根据docId查询索引文档
     *
     * @param docID        documentId
     * @param fieldsToLoad 需要返回的field
     * @return Document
     */
    public static Document searchDocById(int docID, Set<String> fieldsToLoad) {
        try {
            return getIndexReader().document(docID, fieldsToLoad);
        } catch (IOException e) {
            return null;
        }
    }

    /**
     * 根据docId查询索引文档
     *
     * @param docID documentId
     * @return Document
     */
    public static Document searchDocById(int docID) {
        return searchDocById(docID, null);
    }


    /**
     * 获取符合条件的总记录数
     *
     * @param query query
     * @return 总记录数
     */
    public static int searchTotalRecord(Query query) {
        try {
            TopDocs topDocs = getIndexSearcher().search(query, Integer.MAX_VALUE);
            if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
                return 0;
            }
            return topDocs.scoreDocs.length;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

    /**
     * 分页查询
     *
     * @param query
     * @param sort
     * @param page
     * @param hlParam
     * @return
     */
    public static Page<Document> queryPage(Query query, Sort sort, Page<Document> page, HighlightParam hlParam) {
        // 设置总条数
        page.setTotalRecord(searchTotalRecord(query));
        try {
            IndexSearcher searcher = getIndexSearcher();
            TopFieldCollector collector = TopFieldCollector.create(sort, page.getStartPos() + page.getPageSize(), false, false, false);
            searcher.search(query, collector);
            ScoreDoc[] scoreDocs = collector.topDocs(page.getStartPos(), page.getPageSize()).scoreDocs;
            if (scoreDocs == null || scoreDocs.length == 0) {
                return page;
            }

            List<Document> docList = new ArrayList<>();
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document doc = searcher.doc(scoreDoc.doc);
                docList.add(doc);
            }

            if (hlParam != null && hlParam.isHighlight()) {
                highlightField(docList, query, hlParam);
            }
            page.setList(docList);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return page;
    }

    /**
     * 分页查询
     *
     * @param query
     * @param sort
     * @param page
     * @return
     */
    public static Page<Document> queryPage(Query query, Sort sort, Page<Document> page) {
        return queryPage(query, sort, page, null);
    }

    /**
     * 高亮
     *
     * @param docList
     * @param query
     * @param hlParam
     * @return
     */
    private static List<Document> highlightField(List<Document> docList, Query query, HighlightParam hlParam) {
        if (docList == null || docList.size() == 0) {
            return Collections.emptyList();
        }
        Highlighter highlighter = createHighlighter(query, hlParam);

        // 匹配要高亮的字段和Document中字段值,需每个document中的字段都相同
        List<IndexableField> fields = docList.get(0).getFields();
        Set<IndexableField> mix = new HashSet<>();
        for (IndexableField field : fields) {
            for (String fieldName : hlParam.getHighlightFields()) {
                if (field.name().equals(fieldName)) {
                    mix.add(field);
                }
            }
        }

        for (Document document : docList) {
            for (IndexableField field : mix) {
                String fieldName = field.name();
                String highlightValue;
                String text = document.get(fieldName);
                int subLength = hlParam.getSummaryLength() > text.length() ? text.length() : hlParam.getSummaryLength();
                try {
                    highlightValue = highlighter.getBestFragment(analyzer, fieldName, text);
                    highlightValue = highlightValue == null ? text.substring(0, subLength) : highlightValue;
                } catch (Exception e) {
                    e.printStackTrace();
                    highlightValue = text.substring(0, subLength);
                }
                document.removeField(fieldName);
                document.add(new TextField(fieldName, highlightValue, Field.Store.YES));
            }
        }

        return docList;
    }


    /**
     * 创建高亮器
     *
     * @param query   索引查询对象
     * @param hlParam 高亮参数
     */
    private static Highlighter createHighlighter(Query query, HighlightParam hlParam) {
        Formatter formatter = new SimpleHTMLFormatter(hlParam.getPrefix(), hlParam.getSuffix());
        Scorer fragmentScorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
        Fragmenter fragmenter = new SimpleFragmenter(hlParam.getSummaryLength());
        highlighter.setTextFragmenter(fragmenter);
        return highlighter;
    }


    public static void main(String[] args) throws Exception {
        String luceneDir = "/Users/chen/Documents/M-DEMO/lucene";

        // 添加
        Document document = new Document();
        document.add(new TextField("id", "11", Field.Store.YES));
        document.add(new TextField("title", "SpringCloud学习五:路由网关Zuul", Field.Store.YES));
        document.add(new TextField("desc", "服务网关是微服务架构中一个不可或缺的部分。通过服务网关统一向外系统提供REST API的过程中,除了具备服务路由、均衡负载功能之外,它还具备了权限控制等功能。Spring Cloud Netflix中的Zuul就担任了这样的一个角色,为微服务架构提供了前门保护的作用,同时将权限控制这些较重的非业务逻辑内容迁移到服务路由层面,使得服务集群主体能够具备更高的可复用性和可测试性", Field.Store.YES));
        add(document);

        // 查询
        FuzzyQuery fuzzyQuery1 = new FuzzyQuery(new Term("title", "SpringCloud"));
        FuzzyQuery fuzzyQuery2 = new FuzzyQuery(new Term("desc", "故障"));
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(fuzzyQuery1, BooleanClause.Occur.SHOULD);
        query.add(fuzzyQuery2, BooleanClause.Occur.SHOULD);
        Sort sort = new Sort(new SortField("id", SortField.Type.SCORE));

        Page<Document> documentPage = queryPage(query.build(), sort, new Page<>(1, 5), new HighlightParam("title", "desc"));
        System.out.println(documentPage);
        documentPage.getList().forEach(d -> d.getFields().forEach(System.out::println));

        // 删除
        delete(new Term("id", "2"));

    }

}

扫一扫分享到微信

已有 条评论
写评论