/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2008 Alex Buloichik
               2017 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.segmentation;

import java.beans.ExceptionListener;
import java.beans.XMLDecoder;
import java.beans.XMLEncoder;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;


/**
 * The class with all the segmentation data possible -- rules, languages, etc.
 * It loads and saves its data from/to SRX file.
 * 
 * @author Maxym Mykhalchuk
 * @author Thomas Cordonnier
 */
public class SRX implements Serializable, Cloneable {

    private static final long serialVersionUID = 2182125877925944613L;

    public static final String CONF_SENTSEG = "segmentation.conf";
    public static final String SRX_SENTSEG = "segmentation.srx";

    /**
     * Creates an empty SRX, without any rules.
     * <p>
     * Please do not call directly unless you know what you are doing.
     */
    public SRX() {
    }

    public SRX clone() {
        SRX result = new SRX();
        result.mappingRules = new ArrayList<MapRule>(mappingRules.size());
        for (MapRule rule : mappingRules) {
            result.mappingRules.add(rule.clone());
        }
        return result;
    }

    /**
     * Saves segmentation rules into specified directory.
     * Possibilities:
     * -- if file does not exist : SRX format
     * -- if conf file or srx file exists: owerwrite it
     * -- if segmentation-2srx.conf exists, remove and write to SRX
     * -- if segmentation-2conf.srx exists, remove and write to conf
     */
    public static void saveTo(SRX srx, File outDir) throws IOException {
        File outFile = new File (outDir, SRX_SENTSEG.replace(".","-2conf"));
        if (outFile.exists()) { outFile.delete(); if (srx == null) return; outFile = new File (outDir, CONF_SENTSEG); }
        else {
            outFile = new File (outDir, CONF_SENTSEG.replace(".","-2srx"));
            if (outFile.exists()) { outFile.delete(); if (srx == null) return; outFile = new File (outDir, SRX_SENTSEG); }
            else {
                outFile = new File (outDir, CONF_SENTSEG);
                if (! outFile.exists()) outFile = new File (outDir, SRX_SENTSEG);
            }
        }
        
        if (srx == null) {
            outFile.delete();
            return;
        }
        
        if (outFile.getName().endsWith(".conf")) saveToConf(srx, outFile);
        else if (outFile.getName().endsWith(".srx")) saveToSrx (srx, outFile);
    }
    
    public static void saveToConf(SRX srx, File outFile) throws IOException {        
        try {
            srx.setVersion(CURRENT_VERSION);
            XMLEncoder xmlenc = new XMLEncoder(new FileOutputStream(outFile));
            xmlenc.writeObject(srx);
            xmlenc.close();
        } catch (IOException ioe) {
            Log.logErrorRB("CORE_SRX_ERROR_SAVING_SEGMENTATION_CONFIG");
            Log.log(ioe);
            throw ioe;
        }
    }

	private static final XMLOutputFactory staxOutputFactory = XMLOutputFactory.newInstance();	
	
	
    public static void saveToSrx(SRX srx, File outFile) throws IOException {
		try {
			try (java.io.OutputStreamWriter fos = new java.io.OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")) {
				XMLStreamWriter writer = staxOutputFactory.createXMLStreamWriter(fos);
				writer.writeStartDocument("UTF-8", "1.0");
				writer.writeStartElement("srx"); writer.writeDefaultNamespace("http://www.lisa.org/srx20");
				writer.writeAttribute("version", "2.0");
				
				writer.writeCharacters("\n    "); writer.writeEmptyElement("header"); 
				writer.writeAttribute("cascade", srx.cascade ? "yes" : "no"); 
				writer.writeAttribute("segmentsubflows", srx.segmentSubflows ? "yes" : "no");				
				
				writer.writeCharacters("\n    "); writer.writeStartElement("body"); 
				writer.writeCharacters("\n        "); writer.writeStartElement("languagerules");
				for (MapRule mr: srx.getMappingRules()) {
					writer.writeCharacters("\n            "); writer.writeStartElement("languagerule"); writer.writeAttribute("languagerulename", mr.getLanguage());
					for (Rule rule: mr.getRules()) {
						writer.writeCharacters("\n                "); writer.writeStartElement("rule"); writer.writeAttribute("break", rule.isBreakRule() ? "yes" : "no");
						writer.writeCharacters("\n                    "); writer.writeStartElement("beforebreak"); writer.writeCharacters(rule.getBeforebreak()); writer.writeEndElement(); 
						writer.writeCharacters("\n                    "); writer.writeStartElement("afterbreak"); writer.writeCharacters(rule.getAfterbreak()); writer.writeEndElement(); 
						writer.writeCharacters("\n                "); writer.writeEndElement(/*"rule"*/); 
					}
					writer.writeCharacters("\n            "); writer.writeEndElement(/*"languagerule"*/); 
				}
				writer.writeCharacters("\n        "); writer.writeEndElement(/*"languagerules"*/); 
				writer.writeCharacters("\n        "); writer.writeStartElement("maprules"); 
				for (MapRule mr: srx.getMappingRules()) {
					writer.writeCharacters("\n            "); writer.writeEmptyElement("languagemap");
					writer.writeAttribute("languagerulename", mr.getLanguage());
					writer.writeAttribute("languagepattern", mr.getPattern());
				}
				writer.writeCharacters("\n        "); writer.writeEndElement(/*"maprules"*/); 
				writer.writeCharacters("\n    "); writer.writeEndElement(/*"body"*/); 
				writer.writeCharacters("\n"); writer.writeEndElement(/*"srx"*/); 
				writer.close();
			}
		} catch (Exception e) {
			throw new IOException(e);
		}
    }
    
    /**
     * Loads the local segmentation file. Accepts SRX (default) or old CONF format.
     * Accepts -2srx.conf or -2conf.srx, in which case it also tries to convert to other format.
     * In case you use conf format, rules about old version remain valid.
     *
     * @param configDir		The directory to search in. Not the file, because here we will search for various formats
     * @param fallbackDefault		When the file is not found, return default if true. Else return null
     **/
    public static SRX loadSRX(File configDir, boolean fallbackDefault) {
        File inFile = new File (configDir, SRX_SENTSEG.replace(".","-2conf."));
        if (inFile.exists()) 
            try { 
                SRX res = loadSrxFile(inFile.toURL()); 
                try { saveToConf(res, new File (configDir,CONF_SENTSEG)); inFile.delete(); } catch (Exception i1) { Log.log(i1); /* could not convert, but could read */ } 
                return res; 
            } catch (Exception o1) {
            }				
        inFile = new File (configDir, CONF_SENTSEG.replace(".","-2srx."));
        if (inFile.exists()) { 
            SRX res = loadConfFile(inFile); 
            try { inFile.delete(); saveToSrx(res, new File (configDir,SRX_SENTSEG)); } catch (Exception i2) { Log.log(i2); /* could not convert, but could read */ } 
            return res; 
        } else {
            inFile = new File (configDir, CONF_SENTSEG);
            if (inFile.exists()) return loadConfFile(inFile);
            else { 
                inFile = new File (configDir, SRX_SENTSEG); 
                if (inFile.exists()) try { return loadSrxFile(inFile.toURL()); } catch (Exception o2) {}
                
                // Note: as in previous version (3.6) return a new SRX with defaults
                return fallbackDefault ? SRX.getDefault() : null;
            }
        }
    }
    
    /**
     * Loads segmentation rules from an XML file. If there's an error loading a
     * file, it calls <code>SRX.getDefault()</code>.
     * <p>
     * Since 1.6.0 RC8 it also checks if the version of segmentation rules saved
     * is older than that of the current OmegaT, and tries to merge the two sets
     * of rules.
     */
    private static SRX loadConfFile (File configFile) {
        SRX res;
        try {
            MyExceptionListener myel = new MyExceptionListener();
            XMLDecoder xmldec = new XMLDecoder(new FileInputStream(configFile), null, myel);
            res = (SRX) xmldec.readObject();
            xmldec.close();

            if (myel.isExceptionOccured()) {
                StringBuilder sb = new StringBuilder();
                for (Exception ex : myel.getExceptionsList()) {
                    sb.append("    ");
                    sb.append(ex);
                    sb.append("\n");
                }
                Log.logErrorRB("CORE_SRX_EXC_LOADING_SEG_RULES", sb.toString());
                return SRX.getDefault();
            }

            // checking the version
            if (CURRENT_VERSION.compareTo(res.getVersion()) > 0) {
                // yeap, the segmentation config file is of the older version

                // initing defaults
                SRX defaults = SRX.getDefault();
                // and merging them into loaded rules
                res = merge(res, defaults);
            }
            Log.log("using segmentation rules from " + configFile);
        } catch (Exception e) {
            // silently ignoring FNF
            if (!(e instanceof FileNotFoundException))
                Log.log(e);
            res = SRX.getDefault();
        }
        return res;
    }
    
    private static final XMLInputFactory staxInputFactory = XMLInputFactory.newInstance();	

    public static SRX loadSrxFile (URL rulesUrl) {
		SRX res = new SRX(); List<MapRule> newMap = new ArrayList<MapRule>(); res.setMappingRules(newMap);
		try {
			try (java.io.InputStream io = rulesUrl.openStream()) { 
				Log.log("using segmentation rules from " + rulesUrl);
			
				XMLStreamReader reader = staxInputFactory.createXMLStreamReader(io);
				List<Rule> rulesList = null; java.util.HashMap<String,List<Rule>> mapping = new java.util.HashMap<>();
				while (reader.hasNext())
					if (reader.next() == XMLStreamReader.START_ELEMENT)
						if (reader.getName().getLocalPart().equals("header")) {
							res.setCascade(! "no".equals(reader.getAttributeValue(null,"cascade")));
							res.setSegmentSubflows(! "no".equals(reader.getAttributeValue(null,"segmentsubflows")));						
						}
						else if (reader.getName().getLocalPart().equals("languagerule")) {
							rulesList = new ArrayList<Rule>();
							mapping.put(reader.getAttributeValue(null,"languagerulename"), rulesList);
						}
						else if (reader.getName().getLocalPart().equals("rule")) {
							boolean isBreak = ! ("no".equals(reader.getAttributeValue(null,"break")));
							StringBuffer before = new StringBuffer(), after = new StringBuffer(); byte pos = 0;
						RULE_LOOP:
							while (reader.hasNext()) {
								int next = reader.next();
								if (next == XMLStreamReader.START_ELEMENT) {
									if (reader.getName().getLocalPart().equals("beforebreak")) pos = -1;
									else if (reader.getName().getLocalPart().equals("afterbreak")) pos = +1;
									else pos = 0;
								}
								else if (next == XMLStreamReader.END_ELEMENT) {
									if (reader.getName().getLocalPart().equals("beforebreak")) pos = 0;
									else if (reader.getName().getLocalPart().equals("afterbreak")) pos = 0;
									else if (reader.getName().getLocalPart().equals("rule")) break RULE_LOOP;
								}
								else if (reader.hasText()) {
									if (pos == -1) before.append(reader.getText());
									if (pos == +1) after.append(reader.getText());
								}
							}
							rulesList.add(new Rule(isBreak, before.toString(), after.toString()));
						}
						else if (reader.getName().getLocalPart().equals("languagemap")) {
							newMap.add(new MapRule(
								reader.getAttributeValue(null, "languagerulename"), 
								reader.getAttributeValue(null, "languagepattern"), 
								mapping.get(reader.getAttributeValue(null, "languagerulename"))
							));
						}
			}
            return res;
        } catch (Exception ex) {
            Log.log(ex);
            return null;
        }
    }
    
    
    /**
     * Does a config file already exists for the project at the given location?
     * @param configDir the project directory for storage of settings file
     */
    public static boolean projectConfigFileExists(String configDir) {
        File configFile = new File(configDir + CONF_SENTSEG);
        return configFile.exists();
    }


    /** Merges two sets of segmentation rules together. */
    private static SRX merge(SRX current, SRX defaults) {
        current = upgrade(current, defaults);

        int defaultMapRulesN = defaults.getMappingRules().size();
        for (int i = 0; i < defaultMapRulesN; i++) {
            MapRule dmaprule = defaults.getMappingRules().get(i);
            String dcode = dmaprule.getLanguageCode();
            // trying to find
            boolean found = false;
            int currentMapRulesN = current.getMappingRules().size();
            MapRule cmaprule = null;
            for (int j = 0; j < currentMapRulesN; j++) {
                cmaprule = current.getMappingRules().get(j);
                String ccode = cmaprule.getLanguageCode();
                if (dcode.equals(ccode)) {
                    found = true;
                    break;
                }
            }

            if (found) {
                // merging -- adding those rules not there in current list
                List<Rule> crules = cmaprule.getRules();
                List<Rule> drules = dmaprule.getRules();
                for (Rule drule : drules) {
                    if (!crules.contains(drule)) {
                        if (drule.isBreakRule()) {
                            // breaks go to the end
                            crules.add(drule);
                        } else {
                            // exceptions go before the first break rule
                            int currentRulesN = crules.size();
                            int firstBreakRuleN = currentRulesN;
                            for (int k = 0; k < currentRulesN; k++) {
                                Rule crule = crules.get(k);
                                if (crule.isBreakRule()) {
                                    firstBreakRuleN = k;
                                    break;
                                }
                            }
                            crules.add(firstBreakRuleN, drule);
                        }
                    }
                }
            } else {
                // just adding before the default rules
                int englishN = currentMapRulesN;
                for (int j = 0; j < currentMapRulesN; j++) {
                    cmaprule = current.getMappingRules().get(j);
                    String cpattern = cmaprule.getPattern();
                    if (DEFAULT_RULES_PATTERN.equals(cpattern)) {
                        englishN = j;
                        break;
                    }
                }
                current.getMappingRules().add(englishN, dmaprule);
            }
        }
        return current;
    }

    /** Implements some upgrade heuristics. */
    private static SRX upgrade(SRX current, SRX defaults) {
        // renaming "Default (English)" to "Default"
        // and removing English/Text/HTML-specific rules from there
        if (OT160RC9_VERSION.equals(CURRENT_VERSION)) {
            String DEF = "Default (English)";
            for (int i = 0; i < current.getMappingRules().size(); i++) {
                MapRule maprule = current.getMappingRules().get(i);
                if (DEF.equals(maprule.getLanguageCode())) {
                    maprule.setLanguage(LanguageCodes.DEFAULT_CODE);
                    maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.ENGLISH_CODE));
                    maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.F_TEXT_CODE));
                    maprule.getRules().removeAll(getRulesForLanguage(defaults, LanguageCodes.F_HTML_CODE));
                }
            }
        }
        return current;
    }

    /**
     * Find rules for specific language.
     * 
     * @param source
     *            rules list
     * @param langName
     *            language name
     * @return list of rules
     */
    private static List<Rule> getRulesForLanguage(final SRX source, String langName) {
        for (MapRule mr : source.getMappingRules()) {
            if (langName.equals(mr.getLanguageCode())) {
                return mr.getRules();
            }
        }
        return null;
    }

    /**
     * My Own Class to listen to exceptions, occured while loading filters
     * configuration.
     */
    static class MyExceptionListener implements ExceptionListener {
        private List<Exception> exceptionsList = new ArrayList<Exception>();
        private boolean exceptionOccured = false;

        public void exceptionThrown(Exception e) {
            exceptionOccured = true;
            exceptionsList.add(e);
        }

        /**
         * Returns whether any exceptions occured.
         */
        public boolean isExceptionOccured() {
            return exceptionOccured;
        }

        /**
         * Returns the list of occured exceptions.
         */
        public List<Exception> getExceptionsList() {
            return exceptionsList;
        }
    }

    // Patterns
    private static final String DEFAULT_RULES_PATTERN = ".*";

    /**
     * Initializes default rules.
     */
    public static SRX getDefault() {
        SRX srx = loadSrxFile(SRX.class.getResource("defaultRules.srx"));
        srx.includeEndingTags=true;
        srx.segmentSubflows=true;
        return srx;
    }

    /**
     * Finds the rules for a certain language.
     * <p>
     * Usually (if the user didn't screw up the setup) there're a default
     * segmentation rules, so it's a good idea to rely on this method always
     * returning at least some rules.
     * <p>
     * Or in case of a completely screwed setup -- an empty list without any
     * rules.
     */
    public List<Rule> lookupRulesForLanguage(Language srclang) {
        List<Rule> rules = new ArrayList<Rule>();
        for (int i = 0; i < getMappingRules().size(); i++) {
            MapRule maprule = getMappingRules().get(i);
            if (maprule.getCompiledPattern().matcher(srclang.getLanguage()).matches()) {
                rules.addAll(maprule.getRules());
                if (! this.cascade) break; // non-cascading means: do not search for other patterns
            }
        }
        return rules;
    }

    /**
     * Holds value of property cascade: true, unless we read an SRX where it was set to false.
     */
    private boolean cascade = true;

    /**
     * Getter for property cascade.
     * 
     * @return Value of property cascade.
     */
    public boolean isCascade() {
        return this.cascade;
    }

    /**
     * Setter for property cascade.
     * 
     * @param cascade
     *            New value of property cascade.
     */
    public void setCascade(boolean cascade) {
        this.cascade = cascade;
    }

    /**
     * Holds value of property segmentSubflows.
     */
    private boolean segmentSubflows = true;

    /**
     * Getter for property segmentSubflows.
     * 
     * @return Value of property segmentSubflows.
     */
    public boolean isSegmentSubflows() {

        return this.segmentSubflows;
    }

    /**
     * Setter for property segmentSubflows.
     * 
     * @param segmentSubflows
     *            New value of property segmentSubflows.
     */
    public void setSegmentSubflows(boolean segmentSubflows) {

        this.segmentSubflows = segmentSubflows;
    }

    /**
     * Holds value of property includeStartingTags.
     */
    private boolean includeStartingTags;

    /**
     * Getter for property includeStartingTags.
     * 
     * @return Value of property includeStartingTags.
     */
    public boolean isIncludeStartingTags() {

        return this.includeStartingTags;
    }

    /**
     * Setter for property includeStartingTags.
     * 
     * @param includeStartingTags
     *            New value of property includeStartingTags.
     */
    public void setIncludeStartingTags(boolean includeStartingTags) {
        this.includeStartingTags = includeStartingTags;
    }

    /**
     * Holds value of property includeEndingTags.
     */
    private boolean includeEndingTags = true;

    /**
     * Getter for property includeEndingTags.
     * 
     * @return Value of property includeEndingTags.
     */
    public boolean isIncludeEndingTags() {
        return this.includeEndingTags;
    }

    /**
     * Setter for property includeEndingTags.
     * 
     * @param includeEndingTags
     *            New value of property includeEndingTags.
     */
    public void setIncludeEndingTags(boolean includeEndingTags) {
        this.includeEndingTags = includeEndingTags;
    }

    /**
     * Holds value of property includeIsolatedTags.
     */
    private boolean includeIsolatedTags;

    /**
     * Getter for property includeIsolatedTags.
     * 
     * @return Value of property includeIsolatedTags.
     */
    public boolean isIncludeIsolatedTags() {

        return this.includeIsolatedTags;
    }

    /**
     * Setter for property includeIsolatedTags.
     * 
     * @param includeIsolatedTags
     *            New value of property includeIsolatedTags.
     */
    public void setIncludeIsolatedTags(boolean includeIsolatedTags) {

        this.includeIsolatedTags = includeIsolatedTags;
    }

    /**
     * Correspondences between languages and their segmentation rules. Each
     * element is of class {@link MapRule}.
     */
    private List<MapRule> mappingRules = new ArrayList<MapRule>();

    /**
     * Returns all mapping rules (of class {@link MapRule}) at once:
     * correspondences between languages and their segmentation rules.
     */
    public List<MapRule> getMappingRules() {
        return mappingRules;
    }

    /**
     * Sets all mapping rules (of class {@link MapRule}) at once:
     * correspondences between languages and their segmentation rules.
     */
    public void setMappingRules(List<MapRule> rules) {
        mappingRules = rules;
    }

    // ////////////////////////////////////////////////////////////////
    // Versioning properties to detect version upgrades
    // and possibly do something if required

    /** Initial version of segmentation support (1.4.6 beta 4 -- 1.6.0 RC7). */
    public static String INITIAL_VERSION = "0.2";
    /** Segmentation support of 1.6.0 RC8 (a bit more rules added). */
    public static String OT160RC8_VERSION = "0.2.1";
    /** Segmentation support of 1.6.0 RC9 (rules separated). */
    public static String OT160RC9_VERSION = "0.2.2";
    /** Currently supported segmentation support version. */
    public static String CURRENT_VERSION = OT160RC9_VERSION;

    /** Version of OmegaT segmentation support. */
    private String version;

    /** Returns segmentation support version. */
    public String getVersion() {
        return version;
    }

    /** Sets segmentation support version. */
    public void setVersion(String value) {
        version = value;
    }

}
