/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2012-2019 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.search;

import java.util.Collections;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.function.Function;
import java.util.function.BiFunction;

import org.omegat.util.StaticUtils;
import org.omegat.util.Token;
import org.omegat.tokenizer.ITokenizer;

/**
 * Abstract class for string to pattern conversion
 * 
 * @author Thomas Cordonnier
 */
public interface TextExpression {

    public abstract List<SearchMatch> searchString (String text);
    public abstract boolean matchesString (String text); // true if almost one entry is found
    
    public abstract TextExpression asVariableKeeper();
    public default boolean isVariableKeeper() { return false; }
    public abstract TextExpression rebuildForVariables (String[] vars);
    public abstract boolean hasVariables();
	
    /* ---------- Generic way to pass a filter between text expressions ----- */
    
    public static class FilterTextExpression implements TextExpression {
        private Function<String,String> filter;
        private TextExpression originalExpression;
        private BiFunction<String,String,Boolean> keepMatching;
        
        public FilterTextExpression(Function<String,String> filter, BiFunction<String,String,Boolean> keepMatching, TextExpression originalExpression) {
            this.filter = filter; this.originalExpression = originalExpression; this.keepMatching = keepMatching;
        }
        
        public List<SearchMatch> searchString (String text) {
            String filterText = filter.apply (text);
            if (keepMatching.apply (text, filterText)) return originalExpression.searchString(filterText);
            else if (originalExpression.matchesString(filterText)) return Collections.emptyList(); // Zero but true
            else return null; // Null = not found
        }
        
        public boolean matchesString (String text) {
            return originalExpression.matchesString(filter.apply (text));
        }

        public TextExpression asVariableKeeper() {
            return new FilterTextExpression(filter, keepMatching, originalExpression.asVariableKeeper());			
        }
        
        public boolean isVariableKeeper() { return originalExpression.isVariableKeeper(); }
        
        public TextExpression rebuildForVariables (String[] vars) {
            if (! originalExpression.hasVariables()) return this;
            return new FilterTextExpression(filter, keepMatching, originalExpression.rebuildForVariables(vars));
        }
        
        public boolean hasVariables() { return originalExpression.hasVariables(); }
    }
    
    /* ---------------------- Boolean sub-expressions ---------- */
    
    public static class NotExpression implements TextExpression {    
        private TextExpression base;
        
        public NotExpression (TextExpression base) { this.base = base; }
        
        public TextExpression getBaseExpression() { 
            return base; 
        }
        
        public List<SearchMatch> searchString (String text) {
            if (base.matchesString(text)) return null;	// False
            return Collections.emptyList(); // Zero but true
        }

		public boolean matchesString (String text) { return ! base.matchesString(text); }

        public TextExpression asVariableKeeper() { return this;	} // if base answers NO, cannot have variables!
        
        public TextExpression rebuildForVariables (String[] vars) {
            return new NotExpression(base.rebuildForVariables(vars));
        }
        
        public boolean hasVariables() { return base.hasVariables(); }
    }
    
    /* ---------------------- Regular expression --------------- */
    
    public static class RegexTextExpression implements TextExpression {
        private Pattern thePattern;
    
        public RegexTextExpression (String expression, boolean caseSensitive) {
            int flags = caseSensitive ? 0 : Pattern.CASE_INSENSITIVE + Pattern.UNICODE_CASE;
            flags += Pattern.UNICODE_CHARACTER_CLASS; // ASCII is useless in a CAT tool!
			expression = expression.replaceAll("(?<!\\\\)\\$(\\d+)", "%$1");
            thePattern = Pattern.compile (expression, flags);
        }
        
        public Pattern getPattern() {
            return thePattern;
        }
    
		public boolean matchesString (String text) {
			return thePattern.matcher (text).find();
		}
		
        /** Valid if the regular expression can be found inside the text **/
        public List<SearchMatch> searchString (String text) {
            List<SearchMatch> foundMatches = new ArrayList<SearchMatch>();
            Matcher matcher = thePattern.matcher (text);
            while (matcher.find()) foundMatches.add(buildSearchMatch(matcher));
            return foundMatches.size() > 0 ? foundMatches : null;
        }
        
        protected SearchMatch buildSearchMatch (Matcher matcher) {
            return new SearchMatch(matcher.start(), matcher.end()); // default
        }
		
		protected String getReplacement () { return null; }
		
		private RegexTextExpression(Pattern ptn) { this.thePattern = ptn; }
		
		public RegexTextExpression asVariableKeeper() {
			return new RegexTextExpression(thePattern) {				
				@Override protected VarMatch buildSearchMatch (Matcher matcher) { return new VarMatch(matcher); }
				@Override public RegexTextExpression asVariableKeeper() {return this;}
				@Override public boolean isVariableKeeper() { return true; }
			};
		}
				
		public TextExpression rebuildForVariables (String[] vars) {
			if (! thePattern.toString().contains("%")) return this;
			StringBuffer expression = new StringBuffer();
			Matcher m = Pattern.compile("(?<!\\\\)\\%(\\d+)").matcher(thePattern.toString());
			while (m.find()) {
				String val = vars[Integer.parseInt(m.group(1))];
				val = StaticUtils.escapeNonRegex(val,null); val = StaticUtils.escapeNonRegex(val,null); // two times, because appendReplacement wants a regular expression
				m.appendReplacement(expression, val);
			}
			m.appendTail(expression);
			return new RegexTextExpression(expression.toString(), (thePattern.flags() & Pattern.CASE_INSENSITIVE) == 0);
		}
		
		public boolean hasVariables() { return Pattern.compile("(?<!\\\\)\\%(\\d+)").matcher(thePattern.toString()).find(); }		
				
		public String toString() { return "RE:" + thePattern.toString(); }
		
        // -------------------- Factories : these static methods enables easier build of expressions for some common cases -----------------

        public static RegexTextExpression exactStringExpression (String expression, boolean caseSensitive) {
            return new RegexTextExpression(StaticUtils.escapeNonRegex(expression, "S"), caseSensitive);
        }        
        
        protected static final String REGEX_WORD_WITH_JOKERS =    // used to surround a word by \b...\b
            "(\\b|\\\\p\\{L\\}\\*?)"        // must start with boundary or joker
            + "(?<![\\\\\\{\\}])\\p{L}"     // must contain almost one letter, which is not preceeded by \\ nor by { }, such as \\p{L}
            + "(\\p{L}|\\\\p\\{L\\}\\*?)*"  // can contain letters and jokers
            + "(\\\\p\\{L\\}\\*?|\\b)";     // must end with boundary or joker        
        
        public static RegexTextExpression exactWholeWordsExpression (String expression, boolean caseSensitive) {
            expression = StaticUtils.escapeNonRegex(expression, "p{L}");
            expression = expression.replaceAll(REGEX_WORD_WITH_JOKERS, "\\\\b$0\\\\b");			
            return new RegexTextExpression(expression, caseSensitive);
        }
    }

    public static class RegexReplaceExpression extends RegexTextExpression {
        private String replacement;
    
        public RegexReplaceExpression (String expression, boolean caseSensitive, String replacement) {
            super(expression, caseSensitive); this.replacement = replacement;
        }    

		@Override
		protected SearchMatch buildSearchMatch (Matcher matcher) {
			String repl = replacement;
			Matcher replaceMatcher = Pattern.compile("(?<!\\\\)((?:\\\\\\\\)*)\\$(\\d+)").matcher(repl);
			while (replaceMatcher.find()) {
				int varId = Integer.parseInt(replaceMatcher.group(2));
				repl = repl.substring(0, replaceMatcher.start())
					+ replaceMatcher.group(1)
					+ matcher.group (varId) // yes, from source matcher!
						.replace("\\","\\\\").replace("$","\\$")	// avoid re-eval inside replaceCase
					+ repl.substring(replaceMatcher.end());
				replaceMatcher.reset(repl);
			}
			repl = org.omegat.util.StringUtil.replaceCase(repl, org.omegat.core.Core.getProject().getProjectProperties().getTargetLanguage().getLocale());
			if (! repl.equals(replacement)) 
				return new ReplaceMatch(matcher.start(), matcher.end(), repl);
			else 
				return new SearchMatch(matcher.start(), matcher.end());
		}
		
        // -------------------- Factories : these static methods enables easier build of expressions for some common cases -----------------

        public static RegexReplaceExpression exactStringExpression (String expression, boolean caseSensitive, String replacement) {
            expression = StaticUtils.escapeNonRegex(expression, "S");
            expression = expression.replaceAll("\\S+", "($0)");
            return new RegexReplaceExpression(expression, caseSensitive, replacement);
        }        
        
        public static RegexTextExpression exactWholeWordsExpression (String expression, boolean caseSensitive, String replacement) {
            expression = StaticUtils.escapeNonRegex(expression, "p{L}");
            expression = expression.replaceAll(REGEX_WORD_WITH_JOKERS, "\\\\b($0)\\\\b");		
            return new RegexReplaceExpression(expression, caseSensitive, replacement);
        }				
    }
	
    /* ---------------------- Keywords = set of regex, all must return true. --------------- */
    
    public static class WordsTextExpression implements TextExpression {
        private RegexTextExpression[] theExpressions;
        
        public WordsTextExpression (String expression, boolean caseSensitive, boolean wholeWords) {
            String[] words = expression.split(" ");
            theExpressions = new RegexTextExpression [words.length];
            for (int i = 0; i < words.length; i++)
                if (wholeWords) theExpressions[i] = RegexTextExpression.exactWholeWordsExpression(words[i], caseSensitive);
                else theExpressions[i] = RegexTextExpression.exactStringExpression(words[i], caseSensitive);
        }

		public boolean matchesString (String text) {
            for (RegexTextExpression expression: theExpressions) 
                if (! expression.matchesString(text)) return false;
            return true;
		}
        
        /** Valid if all of the words can be found (as string or whole word, but without using tokenizer) **/
        public List<SearchMatch> searchString (String text) {
            List<SearchMatch> foundMatches = new ArrayList<SearchMatch>(theExpressions.length);
            for (RegexTextExpression expression: theExpressions) {
                List<SearchMatch> matches = expression.searchString(text);
                if (matches == null) return null; 
                else foundMatches.addAll (matches);
            }
            return foundMatches.size() > 0 ? foundMatches : null;
        }        
		
        // For plugins, because they may have a different stemming method
        public RegexTextExpression[] split() { return theExpressions; }
		
		private WordsTextExpression(RegexTextExpression[] theExpressions) { this.theExpressions = theExpressions; }
		
		public TextExpression asVariableKeeper() {
			RegexTextExpression[] res = new RegexTextExpression[theExpressions.length];
			for (int i = 0; i < res.length; i++) res[i] = theExpressions[i].asVariableKeeper();
			return new WordsTextExpression(res);
		}
		
		public TextExpression rebuildForVariables (String[] vars) {
			RegexTextExpression[] res = new RegexTextExpression[theExpressions.length];
			for (int i = 0; i < res.length; i++) res[i] = (RegexTextExpression) theExpressions[i].rebuildForVariables(vars);
			return new WordsTextExpression(res);
		}
		
		public boolean hasVariables() { for (RegexTextExpression re: theExpressions) if (re.hasVariables()) return true; return false; }				
    }
        
    /* ---------------------------------- Token search ----------------- */
    
    public static class ExactTokenExpression implements TextExpression {

        protected ITokenizer tokenizer;
        protected Token[] phrase;
        private ITokenizer.StemmingMode stemming_mode;
        private String original; // for plugins
		private boolean caseSensitive;
        
        public ExactTokenExpression (ITokenizer tokenizer, ITokenizer.StemmingMode stemming_mode, String text, boolean caseSensitive) {
            this.tokenizer = tokenizer;
            this.stemming_mode = stemming_mode;
            this.original = text;
			this.caseSensitive = caseSensitive;
            this.phrase = tokenizer.tokenizeWords (text, stemming_mode);
        }

        // For plugins, because they may have a different stemming method
        public String getOriginalExpression() {
            return original;
        }
        
        /** True if the exact expression can be found, but accept it in a grammatically different form **/
        public List<SearchMatch> searchString (String text) {
            if (phrase.length == 0) return null; // avoid exception when calling phrase[0]
            Token[] textTokens = tokenizer.tokenizeWords (text, stemming_mode);
            List<SearchMatch> foundMatches = new ArrayList<SearchMatch>();
            
            TEXTLOOP:
            for (int i = 0; i < textTokens.length; i++)
                if (textTokens[i].equals (phrase[0])) {
					if (caseSensitive)
						if (! textTokens[i].getTextFromString(text).contains (phrase[0].getTextFromString(original)))
							continue TEXTLOOP;
					
                    PHRASELOOP:
                    for (int j = 1; j < phrase.length; j++)
                        if (i + j >= textTokens.length) continue TEXTLOOP;
						else if (! ( phrase[j].equals (textTokens[i + j])) ) continue TEXTLOOP;
						else if (caseSensitive)
							if (! textTokens[i + j].getTextFromString(text).contains (phrase[j].getTextFromString(original))) continue TEXTLOOP;
					
                    // Still here ? The text has been found.
                    Token last = textTokens [i + phrase.length - 1];
                    int length = last.getLength() + last.getOffset() - textTokens[i].getOffset();
                    foundMatches.add (buildMatch(text, textTokens, length, i));
                }
            
            return foundMatches.size() > 0 ? foundMatches : null;
        }
		
		protected SearchMatch buildMatch (String text, Token[] textTokens, int length, int i) {
			return new SearchMatch(textTokens[i].getOffset(), textTokens[i].getOffset() + length);
		}
        
        /** True if the exact expression can be found (including word order), but accept it in a grammatically different form **/
		public boolean matchesString (String text) {
            if (phrase.length == 0) return false; // avoid exception when calling phrase[0]
            Token[] textTokens = tokenizer.tokenizeWords (text, ITokenizer.StemmingMode.MATCHING);
            
            TEXTLOOP:
            for (int i = 0; i < textTokens.length; i++)
                if (textTokens[i].equals (phrase[0])) {
                    PHRASELOOP:
                    for (int j = 1; j < phrase.length; j++)
                        if ((i + j >= textTokens.length) || (! phrase[j].equals (textTokens[i + j])))
                            continue TEXTLOOP;
                    // Still here ? The text has been found.
                    return true;
                }
            
            return false;
        }
		
		public TextExpression asVariableKeeper() {
			return this; // not supported yet
		}		
		
		public TextExpression rebuildForVariables (String[] vars) {
			StringBuffer expression = new StringBuffer();
			Matcher m = Pattern.compile("(?<!\\\\)\\$(\\d+)").matcher(original);
			while (m.find()) m.appendReplacement(expression, vars[Integer.parseInt(m.group(1))]);
			m.appendTail(expression);
			return new ExactTokenExpression (this.tokenizer, this.stemming_mode, expression.toString(), this.caseSensitive);
		}
		
		public boolean hasVariables() { return original.contains("$"); }						
    }
        
    public static class WordsTokenExpression implements TextExpression {

        private ITokenizer tokenizer;
        private ITokenizer.StemmingMode stemming_mode;
        private Set<Token> phrase;
        private String original; // for plugins
		private boolean caseSensitive;
        
        public WordsTokenExpression (ITokenizer tokenizer, ITokenizer.StemmingMode stemming_mode, String text, boolean caseSensitive) {
            this.tokenizer = tokenizer;
            this.stemming_mode = stemming_mode;
            this.original = text;
			this.caseSensitive = caseSensitive;
            Token[] cut = tokenizer.tokenizeWords (text, stemming_mode);
            this.phrase = new HashSet<Token>();
            for (Token current: cut) phrase.add (current);
        }

        // For plugins, because they may have a different stemming method
        public ExactTokenExpression[] split() {
            Token[] cut = tokenizer.tokenizeWords (original, ITokenizer.StemmingMode.NONE);
            ExactTokenExpression[] res = new ExactTokenExpression[cut.length];
            for (int i = 0; i < cut.length; i++) res[i] = new ExactTokenExpression(tokenizer, stemming_mode, cut[i].getTextFromString(original), this.caseSensitive);
            return res;
        }

        private Token findToken (Token[] textTokenArray, Token current, String text) {
            for (Token textToken: textTokenArray) 
                if (textToken.equals(current)) {
					if (caseSensitive)
						if (! textToken.getTextFromString(text).contains (current.getTextFromString(original)))
							continue;
					return textToken;
				}
            return null;
        }
        
        /** True if all words have been found, eventually in another order, and in another grammatical case **/
        public List<SearchMatch> searchString (String text) {
            Token[] textTokenArray = tokenizer.tokenizeWords (text, stemming_mode);
            List<SearchMatch> foundMatches = new ArrayList<SearchMatch>();

            for (Token current: phrase) {
                Token foundToken = findToken(textTokenArray, current, text);
                if (foundToken == null) return null; // All words must be found
				else foundMatches.add (new SearchMatch(foundToken.getOffset(), foundToken.getOffset() + foundToken.getLength()));
            }
            
            return foundMatches.size() > 0 ? foundMatches : null;
        }
    
        /** True if all words have been found, eventually in another order, and in another grammatical case **/
		public boolean matchesString (String text) {
            Token[] textTokenArray = tokenizer.tokenizeWords (text, stemming_mode);

            for (Token current: phrase)
                if (findToken(textTokenArray, current, text) == null) return false; // All words must be found
            return (phrase != null) && (phrase.size() > 0);
        }
		
		public TextExpression asVariableKeeper() {
			return this; // not supported yet
		}		
		
		public TextExpression rebuildForVariables (String[] vars) {
			StringBuffer expression = new StringBuffer();
			Matcher m = Pattern.compile("(?<!\\\\)\\$(\\d+)").matcher(original);
			while (m.find()) m.appendReplacement(expression, vars[Integer.parseInt(m.group(1))]);
			m.appendTail(expression);
			return new WordsTokenExpression (this.tokenizer, this.stemming_mode, expression.toString(), this.caseSensitive);
		}
		
		public boolean hasVariables() { return original.contains("$"); }								
    }
    
    public static class ExactReplaceTokenExpression extends ExactTokenExpression {

        private String replacement;
        
        public ExactReplaceTokenExpression (ITokenizer tokenizer, ITokenizer.StemmingMode stemming_mode, String text, String replacement, boolean caseSensitive) {
            super(tokenizer, stemming_mode, text, caseSensitive); this.replacement = replacement;
        }

		@Override
		protected SearchMatch buildMatch (String text, Token[] textTokens, int length, int i) {
			String replaced = this.replacement;
			replaced = replaced.replaceAll("(?<!\\\\)\\$0", 
				text.substring(textTokens[i].getOffset(), textTokens[i].getOffset() + length));
			for (int j = 0; j < phrase.length; j++)
				replaced = replaced.replaceAll("(?<!\\\\)\\$" + (j+1), 
					text.substring(textTokens[i + j].getOffset(), textTokens[i + j].getOffset() + textTokens[i + j].getLength()));
			/*Matcher m = Pattern.compile("\\p{L}+").matcher (text.substring(textTokens[i].getOffset(), textTokens[i].getOffset() + length));
			for (int j = 0; j < phrase.length; j++) {
				if (! m.find()) break;
				replaced = replaced.replaceAll("(?<!\\\\)\\$" + (j+1), 
					text.substring(textTokens[i].getOffset() + m.start(), textTokens[i].getOffset() + m.end() - m.start()));
			}*/
            return new ReplaceMatch(textTokens[i].getOffset(), textTokens[i].getOffset() + length, replaced);
		}
    }
}
