/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2018 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.search;

import java.util.List;

import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.core.matching.NearString;
import org.omegat.core.events.IStopped;
import org.omegat.core.matching.ISimilarityCalculator;
import org.omegat.core.matching.LevenshteinDistance;
import org.omegat.gui.search.PreTranslateDialog;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.util.Token;


/**
 * This searcher will check selected items against <i>all</i> ongoing source segments, including non-translated ones. <br/>
 * Used by pre-translate dialog where "match in = Source"
 * 
 * @author Thomas Cordonnier
 */
public class CrossSourceSearcher extends ProjectSearcher implements IStopped {
    
    /**
     * Create new searcher instance.
     * 
     * @param project
     *            Current project
     */
    public CrossSourceSearcher(final IProject project, PreTranslateDialog window, int numberOfResults, int minGroupSize,
        TextExpression searchSource, TranslationStateFilter filter, final NearString.SORT_KEY criteria, final int minValue,
        TextExpression author, long dateAfter, long dateBefore) {
        super (window, project, true, false, numberOfResults, 
            author, null, dateAfter, dateBefore);
        
        this.m_criteria = criteria; this.m_minVal = minValue; this.m_minGroupSize = minGroupSize;
        entryPerId = new CrossSourceSearchResultEntry[project.getAllEntries().size()];
        this.tokenizer = m_project.getSourceTokenizer();
        this.m_searchSource = searchSource; this.m_translationStateFilter = filter;
    }
    
    public boolean isStopped() { return isInterrupted(); }	
    
    // -------------- Searcher extension
    
    @Override
    protected void testOngoing(SourceTextEntry ste) {
        TMXEntry tmxEntry = m_project.getTranslationInfo(ste);
        if (! getTranslationStateFilter().isValidEntry (tmxEntry)) return;
        if (! checkFilters (tmxEntry)) return;
        List<SearchMatch> srcMatches = m_searchSource.searchString(ste.getSrcText());
        if (srcMatches != null) { //addEntry(buildOngoingEntry(ste, tmxEntry, srcMatches));
            Token[] selfTokens = this.tokenize(ste.getSrcText());
            // Search the entry with better similarity with current one. 
            int maxFound = m_minVal - 1, idxFound = -1;
            LOOP2: for (SourceTextEntry ste2: m_project.getAllEntries()) {
                if (ste == ste2) continue;
                TMXEntry tmxEntry2 = m_project.getTranslationInfo(ste);
                if (! getTranslationStateFilter().isValidEntry (tmxEntry2)) return;
                if (! checkFilters (tmxEntry2)) return;
                if (! m_searchSource.matchesString(ste.getSrcText())) continue LOOP2;
                
                Token[] candTokens = this.tokenize(ste2.getSrcText());
                int similarity = distance.calcSimilarity(selfTokens, candTokens);
                if (similarity > maxFound) { idxFound = ste2.entryNum(); maxFound = similarity; }
            }
            if (idxFound == -1) return;	// no entry is > m_minVal
            else
                if (entryPerId[idxFound - 1] != null) {
                    entryPerId[idxFound - 1].add (ste);
                    if (entryPerId[idxFound - 1].entries().size() >= m_minGroupSize) this.addEntry(entryPerId[idxFound - 1]);
                    addEntry( entryPerId[ste.entryNum() - 1] = entryPerId[idxFound - 1] );
                } else {
                    CrossSourceSearchResultEntry entry = new CrossSourceSearchResultEntry(ste, tmxEntry, srcMatches);
                    entry.add (m_project.getAllEntries().get(idxFound - 1)); if (entry.entries().size() >= m_minGroupSize) this.addEntry(entry);
                    addEntry( entryPerId[idxFound - 1] = entryPerId[ste.entryNum() - 1] = entry );
                }
        }
    }	
    
    private ITokenizer tokenizer;
    private final ISimilarityCalculator distance = new LevenshteinDistance();
    
    private Token[] tokenize(String st) {
        if (tokenizer == null) tokenizer = m_project.getSourceTokenizer();
        switch (this.m_criteria) {
            case SCORE: return tokenizer.tokenizeWords(st, ITokenizer.StemmingMode.MATCHING);
            case SCORE_NO_STEM: return tokenizer.tokenizeWords(st, ITokenizer.StemmingMode.NONE);
            case ADJUSTED_SCORE: return tokenizer.tokenizeVerbatim(st);
        }
        return tokenizer.tokenizeVerbatim(st);
    }
    
    @Override public TranslationStateFilter getTranslationStateFilter() { return m_translationStateFilter; }	
    @Override public boolean searchOn(int location) { return (location & SEARCH_SCOPE_ONGOING) > 0; }
    
    private final NearString.SORT_KEY m_criteria; private final int m_minVal, m_minGroupSize;
    private CrossSourceSearchResultEntry[] entryPerId;
    protected final TranslationStateFilter m_translationStateFilter;
    protected final TextExpression m_searchSource;
}
