/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2012 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.search;

import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.LuceneEnglishTokenizer;
import org.omegat.tokenizer.LuceneFrenchTokenizer;

import junit.framework.TestCase;

/**
 * Test for text expressions
 * 
 * @author Thomas Cordonnier
 */
public class TextExpressionSearchTest extends TestCase {
    public void testStringToRegex() {
        TextExpression expr = TextExpression.RegexTextExpression.exactStringExpression("test", false);
        assertTrue(expr.matchesString("This phrase contains word test."));
        expr = TextExpression.RegexTextExpression.exactStringExpression("test\\b", false);
        assertFalse(expr.matchesString("This phrase contains word test.")); // false because \\b is not string expression
    }
    
    public void testWholeWords() {
        TextExpression expr = TextExpression.RegexTextExpression.exactStringExpression("test", false);
        assertTrue(expr.matchesString("This phrase contains word test."));
        assertTrue(expr.matchesString("This phrase contains word tests.")); // as string, this is true
        assertTrue(expr.matchesString("This phrase contains word protestant."));  // as string, this is true
        expr = TextExpression.RegexTextExpression.exactWholeWordsExpression("test", false);
        assertTrue(expr.matchesString("This phrase contains word test."));
        assertFalse(expr.matchesString("This phrase contains word tests.")); // as whole word, this is true
        assertFalse(expr.matchesString("This phrase contains word protestant."));  // as whole word, this is true
    }
    
    public void testJokerForStrings() {
        TextExpression expr = TextExpression.RegexTextExpression.exactStringExpression("t*st", false);
        assertTrue(expr.matchesString("twist."));
        assertTrue(expr.matchesString("time4test.")); // true because * means non-space, including 4
        assertTrue(expr.matchesString("the_first.")); // true because * means non-space, including _
        assertFalse(expr.matchesString("the first")); // false because space does not match *
    }
    
    public void testJokerForWholeWords() {
        TextExpression expr = TextExpression.RegexTextExpression.exactWholeWordsExpression("t*st", false);
        assertTrue(expr.matchesString("twist."));
        assertFalse(expr.matchesString("time4test.")); // false because * means letters
        assertFalse(expr.matchesString("the_first.")); // false because * means letters (but _ matches \w, not \p{Letter}!)
        assertFalse(expr.matchesString("the first")); // false because * means letters
    }    
    
    public void testTokenizer() {
        LuceneEnglishTokenizer tkEn = new LuceneEnglishTokenizer();
        TextExpression expr = new TextExpression.ExactTokenExpression(tkEn, ITokenizer.StemmingMode.MATCHING, "small test", true);
        assertTrue(expr.matchesString("doing a small test"));
        assertTrue(expr.matchesString("doing small tests")); // plural should match
        assertTrue(expr.matchesString("small testing")); // true in english!
        assertFalse(expr.matchesString("small protestation")); // false in english!
        
        LuceneFrenchTokenizer tkFr = new LuceneFrenchTokenizer();
        expr = new TextExpression.ExactTokenExpression(tkFr, ITokenizer.StemmingMode.MATCHING, "petit test", true);
        assertTrue(expr.matchesString("je fais un petit test"));
        assertTrue(expr.matchesString("je fais des petits tests")); // plural should match
        assertFalse(expr.matchesString("petit testing")); // "ing" ending does not work in french
        assertFalse(expr.matchesString("petite protestation")); // false in french!
    }
}
