/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2018 Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.segmentation;

import java.io.InputStream;
import java.io.File;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.net.URL;

import org.omegat.util.Language;
import org.omegat.util.Log;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.events.XMLEvent;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.Attribute;

/**
 * Loader for Culter Segmentation Convertible format (CSC: http://www.silvestris-lab.org/node/66)
 * Actually can only load XML form, and cannot save
 * 
 * @author Thomas Cordonnier
 */
public class CSC implements ISegmentationData {

    public static final String CSC_URI = "http://culter.silvestris-lab.org/compatible";

    public CSC() {}
    
    /** Converts SRX to CSC. Does not build any template **/
    public CSC (SRX ori) {
        this.cascade = ori.isCascade();
        for (MapRule mr: this.mappingRules = ori.getMappingRules())
            if (this.namedLists.get(mr.getLanguageCode()) != null) continue; // do not redefine twice the same
            else {
                List<ApplyRuleTemplate> newList = new ArrayList<>(mr.getRules().size());
                this.namedLists.put(mr.getLanguageCode(), newList);
                for (Rule rule0: mr.getRules()) newList.add (new ApplyRuleTemplate(rule0));
            }
    }
    
    // ------------------- Reader, using STAX --------------

    private static final XMLInputFactory staxInputFactory = XMLInputFactory.newInstance();

    /**
     * Loads a file in CSCX format.
     * For the moment, supports only rules-mapping and language rules
     **/
    protected static CSC loadCscxFile (URL rulesUrl) throws Exception {
        CSC data = new CSC(); CSC base = null;
        try (InputStream io = rulesUrl.openStream()) { 
            XMLEventReader reader = staxInputFactory.createXMLEventReader(io);
            while (reader.hasNext()) {
                XMLEvent event = reader.nextEvent();
                if (event.isStartElement()) {
                    StartElement startEl = event.asStartElement();
                    switch (startEl.getName().getLocalPart()) {
                        case "seg-rules": {
                            Attribute extendsAttr = startEl.getAttributeByName(new QName(null,"extends"));
                            if (extendsAttr != null) {
                                String val = extendsAttr.getValue();
                                if (val.toUpperCase().equals("#DEFAULT-CONFIG")) base = new CSC (ISegmentationData.getDefault());
                                else if (val.toUpperCase().equals("#GLOBAL-CONFIG")) {
                                    ISegmentationData baseData = ISegmentationData.loadFromDir(new File(org.omegat.util.StaticUtils.getConfigDir()), true);
                                    if (baseData instanceof SRX) base = new CSC((SRX) baseData);
                                    if (baseData instanceof CSC) base = (CSC) baseData;
                                }
                                else {
                                    URL baseUrl = new URL(rulesUrl, val);
                                    if (val.toLowerCase().endsWith(".srx")) base = new CSC(SRX.loadSrxFile(baseUrl));
                                    //if (val.toLowerCase().endsWith(".conf")) base = new CSC(SRX.loadConfFile(baseUrl));
                                    if (val.toLowerCase().endsWith(".cscx")) base = loadCscxFile(baseUrl);
                                }
                                data.templatesMap = (HashMap) base.templatesMap.clone();
                                data.namedLists = (HashMap) base.namedLists.clone();
                            }
                            break;
                        }
                        case "rules-mapping": data.readRulesMapping(reader, base, startEl); break;
                        case "rule-templates": data.readRuleTemplates(reader); break;
                        case "languagerules": data.readLanguageRules(reader, rulesUrl); break;						
                    }
                }
                // Special case: when you extend an existing CSCX file, mapping rules are optional
                if (event.isEndElement() && (event.asEndElement().getName().getLocalPart().equals("seg-rules")))
                    if ((base != null) && (data.mappingRules.size() == 0))
                        data.mappingRules.addAll (base.mappingRules);
            }
            reader.close();
        }
        return data;
    }
    
    private static final QName Q_name = new QName(null, "name");
    private static final QName Q_languagepattern = new QName(null, "languagepattern"), Q_languagerulename = new QName(null, "languagerulename");
    
    // Reads until </rules-mapping> and loads mappings in current CSC
    private void readRulesMapping(XMLEventReader reader, CSC base, StartElement startEl) throws Exception {
        Attribute extensionMode = startEl.getAttributeByName(new QName(null, "extension-mode")); // may be null
        if ((extensionMode != null) && ("after".equals(extensionMode))) mappingRules.addAll (base.mappingRules); // first add base rules, then add new ones
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("rules-mapping")) return;
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("languagemap"))
                try {
                    StartElement el = (StartElement) event;
                    mappingRules.add (new MapRule(el.getAttributeByName(Q_languagerulename).getValue(), el.getAttributeByName(Q_languagepattern).getValue(), null));
                } catch (Exception e) {
                    Log.log("Warning: wrong map rule in CSC file: " + event);
                }
        }
        if ((extensionMode != null) && ("before".equals(extensionMode))) mappingRules.addAll (base.mappingRules); // add base rules after what we read
    }
    
    private static final QName Q_break = new QName(null, "break");
    
    // Reads until </rule-templates> and loads templates in current CSC
    private void readRuleTemplates(XMLEventReader reader) throws Exception {
        Rule currentRule = null;
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("rule-templates")) return;
            
            try {
                StartElement el = (StartElement) event;
                if (el.getName().getLocalPart().equals("rule-template"))
                    templatesMap.put (el.getAttributeByName(Q_name).getValue(), currentRule = new Rule());					
                if (el.getName().getLocalPart().equals("rule")) {
                    currentRule.setBreakRule("yes".equals(el.getAttributeByName(Q_break).getValue()));
                    readRule(reader, currentRule);
                }
                if (el.getName().getLocalPart().equals("break-rule")) {
                    currentRule.setBreakRule(true);
                    readRule(reader, currentRule);
                }
                if (el.getName().getLocalPart().equals("exception-rule")) {
                    currentRule.setBreakRule(false);
                    readRule(reader, currentRule);
                }
            } catch (ClassCastException cce) {
                // Nothing : except </rule-templates>, end elements are not managed by this method
            } catch (NullPointerException npe) {
                Log.log("Error [CSC]: something wrong (probably missing attribute) with rule template : " + event);
            }
        }
    }

    private static final Pattern PATTERN_CSC_VARIABLE = Pattern.compile("%\\{(\\w+)\\}");
    
    private void readRule(XMLEventReader reader, Rule currentRule) throws Exception {
        StringBuffer text = new StringBuffer();
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().endsWith("rule")) return;
            
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("beforebreak")) text.setLength(0);
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("afterbreak")) text.setLength(0);
            if (event.isCharacters()) text.append(event.asCharacters().getData());
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("beforebreak")) {
                currentRule.setBeforebreak(PATTERN_CSC_VARIABLE.matcher(text.toString()).replaceAll("%<$1>")); text.setLength(0);
            }
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("afterbreak")) {
                currentRule.setAfterbreak(PATTERN_CSC_VARIABLE.matcher(text.toString()).replaceAll("%<$1>")); text.setLength(0);
            }
        }
    }
    
    /** Contains either a rule without params, or a template with association param -> value **/
    private static class ApplyRuleTemplate {
        Rule rule;
        HashMap<String,List<String>> params;
        List<String> paramOrder;
        
        // Used for non-apply-template
        ApplyRuleTemplate(Rule rule) { this.rule = rule; this.params = null; this.paramOrder = null; }
        // Used for apply-template
        ApplyRuleTemplate(Rule rule, HashMap<String,List<String>> params, List<String> order) { this.rule = rule; this.params = params; this.paramOrder = order; }		
    }
    private HashMap<String,List<ApplyRuleTemplate>> namedLists = new HashMap<>();
    
    // Reads until </languagerules> and loads rules in current CSC
    private void readLanguageRules(XMLEventReader reader, URL dataURL) throws Exception {
        List<ApplyRuleTemplate> currentList = null; 
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().endsWith("languagerules")) return;
            
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("languagerule")) {
                currentList = new ArrayList<>();
                readSingleLanguageRule (reader, currentList, dataURL);
                Attribute extensionMode = event.asStartElement().getAttributeByName(new QName(null, "extension-mode"));
                if (extensionMode == null || extensionMode.getValue().equals("overwrite")) // default : overwrite, so ignore what is in base
                    namedLists.put (event.asStartElement().getAttributeByName(Q_languagerulename).getValue(), currentList);
                else if (extensionMode.getValue().equals("before")) // add what we just read at the beginning
                    namedLists.get (event.asStartElement().getAttributeByName(Q_languagerulename).getValue()).addAll(0, currentList);
                else if (extensionMode.getValue().equals("after")) // add what we just read at the end
                    namedLists.get (event.asStartElement().getAttributeByName(Q_languagerulename).getValue()).addAll(currentList);				
            }
        }
    }
    
    private void readSingleLanguageRule(XMLEventReader reader, List<ApplyRuleTemplate> currentList, URL dataURL) throws Exception {
        Rule currentRule = null;
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().endsWith("languagerule")) return;
            
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("rule")) {
                currentRule = new Rule();
                currentRule.setBreakRule("yes".equals(event.asStartElement().getAttributeByName(Q_break).getValue()));
                readRule(reader, currentRule);
                currentList.add (new ApplyRuleTemplate(currentRule));
            }
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("break-rule")) {
                currentRule = new Rule();
                currentRule.setBreakRule(true);
                readRule(reader, currentRule);
                currentList.add (new ApplyRuleTemplate(currentRule));
            }
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("exception-rule")) {
                currentRule = new Rule();
                currentRule.setBreakRule(false);
                readRule(reader, currentRule);
                currentList.add (new ApplyRuleTemplate(currentRule));
            }
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("apply-rule-template")) {
                currentRule = templatesMap.get(event.asStartElement().getAttributeByName(Q_name).getValue());
                ApplyRuleTemplate withParams = new ApplyRuleTemplate(currentRule, new HashMap<>(), new ArrayList<>());
                readParameters(reader, withParams.params, withParams.paramOrder, dataURL);
                currentList.add (withParams);
            }
        }
    }

    private static final QName Q_val = new QName(null, "value");
    
    private void readParameters(XMLEventReader reader, HashMap<String,List<String>> params, List<String> order, URL dataURL) throws Exception {
        String curName = null; List<String> curList = null; StringBuffer buf = new StringBuffer();
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().endsWith("apply-rule-template")) return;
            
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("param")) {
                StartElement el = event.asStartElement();
                curName = el.getAttributeByName(Q_name).getValue();
                order.add(curName);
                if (el.getAttributeByName(Q_val) != null)
                    params.put(curName, java.util.Collections.singletonList(el.getAttributeByName(Q_val).getValue()));
                else
                    params.put(curName, curList = new java.util.ArrayList<>());
            }
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("item")) buf.setLength(0);
            if (event.isCharacters()) buf.append(event.asCharacters().getData()); 
            if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("item")) curList.add(buf.toString());
            if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("item-list-file"))
                try (InputStream is = new URL (dataURL, event.asStartElement().getAttributeByName(Q_name).getValue()).openStream()) {
                    String encoding = "UTF-8"; Attribute attr = event.asStartElement().getAttributeByName(new QName(null,"format")); if (attr != null) { encoding = attr.getValue(); encoding = encoding.substring(encoding.indexOf(':') + 1); }
                    Pattern remove = null; attr = event.asStartElement().getAttributeByName(new QName(null,"remove")); if (attr != null) remove = Pattern.compile(attr.getValue());
                    Pattern comments = null; attr = event.asStartElement().getAttributeByName(new QName(null,"comments")); if (attr != null) comments = Pattern.compile(attr.getValue());
                    
                    try (java.io.BufferedReader fileReader = new java.io.BufferedReader(new java.io.InputStreamReader(is, encoding))) {
                        String line = "";
                        while ((line = fileReader.readLine()) != null) {
                            if ((comments != null) && comments.matcher(line).matches()) continue;
                            line = remove.matcher(line).replaceAll("");
                            curList.add (line);
                        }
                    }
                }
        }
    }
    
    // ------------------- ISegmentationData --------------
    
    private boolean cascade = true;
    private List<MapRule> mappingRules = new ArrayList<MapRule>();
    private HashMap<String,Rule> templatesMap = new HashMap<>();
    
    public List<Rule> lookupRulesForLanguage(Language srclang) {
        List<Rule> rules = new ArrayList<Rule>();
        for (int i = 0; i < mappingRules.size(); i++) {
            MapRule maprule = mappingRules.get(i);
            if (maprule.getCompiledPattern().matcher(srclang.getLanguageCode()).matches()) {
                // rules.addAll(maprule.getRules()); -- in CSC, rules are not stored in the maprule
                for (ApplyRuleTemplate tpl : namedLists.get(maprule.getLanguageCode())) {
                    if (tpl.params == null) rules.add (tpl.rule);
                    else {
                        // Translate the template with params to one big rule
                        String before = tpl.rule.getBeforebreak(), after = tpl.rule.getAfterbreak();
                        for (String paramName : tpl.paramOrder) {
                            before = before.replace("%<" + paramName + ">", String.join("|", tpl.params.get(paramName)));
                            after = after.replace("%<" + paramName + ">", String.join("|", tpl.params.get(paramName)));
                        }
                        rules.add (new Rule(tpl.rule.isBreakRule(), before, after));
                    }
                }
                if (! this.cascade) break; // non-cascading means: do not search for other patterns
            }
        }
        return rules;
    }
    
    public SRX toSRX() {
        SRX res = new SRX(); res.setCascade(this.cascade);
        List<MapRule> newRules = new ArrayList<MapRule>(mappingRules.size());
        for (MapRule mapRule0 : mappingRules) {
            List<Rule> srxRules = new ArrayList<>();
            for (ApplyRuleTemplate tpl : namedLists.get(mapRule0.getLanguageCode())) {
                if (tpl.params == null) srxRules.add (tpl.rule);
                else
                    // Translate the template with params to human-readable set of rules
                    CSC.toSrxRules(tpl.rule.getBeforebreak(), tpl.rule.getAfterbreak(), tpl, 0, srxRules);
            }			
            newRules.add (new MapRule (mapRule0.getLanguageCode(), mapRule0.getPattern(), srxRules));
        }
        res.setMappingRules(newRules); return res;
    }
    
    private static void toSrxRules(String before, String after, ApplyRuleTemplate tpl, int paramId, List<Rule> srxRules) {
        if (paramId >= tpl.paramOrder.size()) srxRules.add (new Rule(tpl.rule.isBreakRule(), before, after));
        else {
            String paramName = tpl.paramOrder.get(paramId);
            for (String paramVal : tpl.params.get(paramName))
                CSC.toSrxRules(before.replace("%<" + paramName + ">", paramVal), after.replace("%<" + paramName + ">", paramVal),
                    tpl, paramId + 1, srxRules);
        }
    }
}
