/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2018 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 **************************************************************************/

package org.omegat.core.search;

import java.util.List;

import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.core.matching.NearString;
import org.omegat.core.matching.FuzzyMatcher;
import org.omegat.core.statistics.FindMatches;
import org.omegat.core.events.IStopped;
import org.omegat.core.matching.ISimilarityCalculator;
import org.omegat.core.matching.LevenshteinDistance;
import org.omegat.gui.search.PreTranslateDialog;
import org.omegat.util.Token;
import org.omegat.tokenizer.ITokenizer;


/**
 * This searcher will check selected items against <i>all</i> ongoing source segments, including non-translated ones. <br/>
 * Used by pre-translate dialog where "match in = Source"
 * 
 * @author Thomas Cordonnier
 */
public class CrossSourceSearcher extends ProjectSearcher implements IStopped {
    
    /**
     * Create new searcher instance.
     * 
     * @param project
     *            Current project
     */
    public CrossSourceSearcher(final IProject project, PreTranslateDialog window, int numberOfResults, int minGroupSize,
        TextExpression searchSource, TranslationStateFilter filter, final NearString.SORT_KEY criteria, final int minValue,
        TextExpression author, long dateAfter, long dateBefore) {

        super (window, project, true, numberOfResults, 
            filter, ProjectSearcher.SEARCH_SCOPE_ONGOING, // Can modify translated segments, but search only ongoing
            searchSource, null, null, false, // search only in source
            author, null, dateAfter, dateBefore);
        
        this.m_criteria = criteria; this.m_minVal = minValue; this.m_minGroupSize = minGroupSize;
        entryPerId = new CrossSourceSearchResultEntry[project.getAllEntries().size()];
        this.tokenizer = m_project.getSourceTokenizer();
    }
    
    public boolean isStopped() { return isInterrupted(); }	
    
    public OngoingSearchResultEntry testOngoing(SourceTextEntry ste, boolean add) {
        return super.testOngoing(ste, false); 
    }
    
	// -------------- Searcher extension
	
	@Override
	protected CrossSourceSearchResultEntry buildOngoingEntry (SourceTextEntry ste, TMXEntry tmxEntry, List<SearchMatch> srcMatches, List<SearchMatch> targetMatches, List<SearchMatch> noteMatches) {
        Token[] selfTokens = this.tokenize(ste.getSrcText());
		// Search the entry with better similarity with current one. 
		int maxFound = m_minVal - 1, idxFound = -1;
        for (SourceTextEntry ste2: m_project.getAllEntries()) {
            if (ste == ste2) continue;
            Token[] candTokens = this.tokenize(ste2.getSrcText());
            int similarity = FuzzyMatcher.calcSimilarity(distance, selfTokens, candTokens);
            if (similarity > maxFound) { idxFound = ste2.entryNum(); maxFound = similarity; }
		}
		if (idxFound == -1) return null;	// no entry is > m_minVal
		else
            if (entryPerId[idxFound - 1] != null) {
                entryPerId[idxFound - 1].add (ste);
                if (entryPerId[idxFound - 1].entries().size() >= m_minGroupSize) this.addEntry(entryPerId[idxFound - 1]);
                return entryPerId[ste.entryNum() - 1] = entryPerId[idxFound - 1];
            } else {
                CrossSourceSearchResultEntry entry = new CrossSourceSearchResultEntry(ste, tmxEntry, srcMatches, targetMatches, noteMatches);
                entry.add (m_project.getAllEntries().get(idxFound - 1)); if (entry.entries().size() >= m_minGroupSize) this.addEntry(entry);
                return entryPerId[idxFound - 1] = entryPerId[ste.entryNum() - 1] = entry;
            }
	}
    
    private ITokenizer tokenizer;
    private final ISimilarityCalculator distance = new LevenshteinDistance();
    
    private Token[] tokenize(String st) {
        if (tokenizer == null) tokenizer = m_project.getSourceTokenizer();
        switch (this.m_criteria) {
            case SCORE: return tokenizer.tokenizeWords(st, ITokenizer.StemmingMode.MATCHING);
            case SCORE_NO_STEM: return tokenizer.tokenizeWords(st, ITokenizer.StemmingMode.NONE);
            case ADJUSTED_SCORE: return tokenizer.tokenizeVerbatim(st);
        }
        return tokenizer.tokenizeVerbatim(st);
    }
		
	private final NearString.SORT_KEY m_criteria; private final int m_minVal, m_minGroupSize;
    private CrossSourceSearchResultEntry[] entryPerId;

}
