/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2020 Thomas CORDONNIER
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.matching.external.lucene;

import java.util.regex.*;
import java.util.*;
import java.io.*;

import org.omegat.core.glossaries.IGlossary;
import org.omegat.core.glossaries.GlossaryEntryStore;
import org.omegat.core.search.ISearchable;
import org.omegat.core.search.TextExpression;
import org.omegat.core.data.PrepareTMXEntry;
import org.omegat.core.Core;
import org.omegat.tokenizer.BaseTokenizer;
import org.omegat.filters2.master.PluginUtils;
import org.omegat.util.Language;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;

public class LuceneGlossaryReader implements IGlossary {

    private final Analyzer analyzer;

    private IndexReader indexReader;
    private IndexSearcher searcher;
    private String name;

    public LuceneGlossaryReader (java.util.Properties prop) throws IOException,InstantiationException,IllegalAccessException {
        this (new File(prop.getProperty("dir")));
    }
    
    public LuceneGlossaryReader(File indexDirectory) throws IOException,InstantiationException,IllegalAccessException {
		System.err.println("Reading Lucene : " + indexDirectory);
        if (! indexDirectory.isDirectory()) indexDirectory = indexDirectory.getParentFile();
        name = indexDirectory.getPath();
        indexReader = IndexReader.open(FSDirectory.open(indexDirectory));
        searcher = new IndexSearcher(indexReader);
        BaseTokenizer srcTok = (BaseTokenizer) PluginUtils.getTokenizerClassForLanguage(Core.getProject().getProjectProperties().getSourceLanguage()).newInstance();
        this.analyzer = srcTok.getGlossaryAnalyser();
    }
    
    
    public String getEngineName() {
        return "Lucene";
    }
    
    public String getShortName() {
        return name;
    }
    
    public String getLongName() {
        return name + " (Lucene)";
    }
    
    public boolean isExternal() {
        return false;
    }
    
    public boolean hasContents() {
        return true;
    }    
    
    private BooleanQuery buildQuery (String text, BooleanClause.Occur occur) throws IOException {
        final BooleanQuery query = new BooleanQuery();        
        TokenStream queryTokenStream = analyzer.tokenStream("src", new StringReader(text));
        TermAttribute termAtt = queryTokenStream.addAttribute(TermAttribute.class);
        queryTokenStream.reset(); 
        Set<String> terms = new HashSet<String>();
        while (queryTokenStream.incrementToken()) terms.add (termAtt.term());
        for (String current: terms) {
            Term t = new Term("src", current); 
            query.add(new TermQuery(t), occur);
        }
        queryTokenStream.end();
        queryTokenStream.close();
        final BooleanQuery query1 = new BooleanQuery(); query1.add(new BooleanClause(query, BooleanClause.Occur.MUST));
        return query1;
    }
	
	
    public Iterable<GlossaryEntryStore> search(Language sLang, Language tLang, String srcText) throws Exception {
        return executeQuery (buildQuery(srcText, BooleanClause.Occur.SHOULD), 1000);
    }
    
    private Iterable<GlossaryEntryStore> executeQuery (final BooleanQuery query, final int maxCount) throws IOException {
        final TopDocs docs = searcher.search(query, maxCount);
        return () -> new Iterator<GlossaryEntryStore>() {
            private int i = -1;
                
            public boolean hasNext() { return i < docs.scoreDocs.length; }
            public GlossaryEntryStore next() {
                try {
                    Document doc = searcher.doc (docs.scoreDocs[++i].doc);
                    return toEntry (doc);
                } catch (Exception cie) {
                    throw new RuntimeException (cie);
                }
            }
        };
    }
        
    private GlossaryEntryStore toEntry (Document doc) {
        return new GlossaryEntryStore(doc.get("src"), doc.get("tra"), "", this);
    }
}
