/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2010 Alex Buloichik, Ibai Lakunza Velasco, Didier Briel
               2013 Martin Wunderlich, Didier Briel, Thomas Cordonnier
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.matching.external;

import java.io.StringReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Properties;
import java.util.Iterator;
import java.util.TreeSet;

import org.omegat.core.data.PrepareTMXEntry;
import org.omegat.util.Preferences;
import org.omegat.util.OStrings;
import org.omegat.util.Language;
import org.omegat.util.WikiGet;
import org.omegat.util.Token;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import org.xml.sax.InputSource; 

import javax.xml.parsers.DocumentBuilderFactory; 
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;


/**
 * Access to MyMemory as an external translation memory (not a machine translator) <br>
 * 
 * @author Ibai Lakunza Velasco
 * @author Didier Briel
 * @author Martin Wunderlich
 * @author Thomas Cordonnier
 */
public class MyMemory extends MenuMemory {

    private static final String MYMEMORY_API_EMAIL = "mymemory.api.email";
    
    // ----------------------- Access to web service -----------------
    
    protected static String GT_URL = "https://mymemory.translated.net/api/get?q=";
    protected static String GT_URL2 = "&langpair=#sourceLang#|#targetLang#&of=#format#";
    protected static String XPATH_QUERY_ALL_TU = "//tu"; 
    protected static String XPATH_QUERY_TUV = "child::tuv[starts-with(@lang, '#langCode#')]/seg/text()"; // MyMemory always returns a 4-letter locale code, even when the query contains a language code only; to make sure we get the right matches, only the language code is taken into account
    
    protected static final DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
    protected static final XPathFactory xpathFactory = XPathFactory.newInstance();
    
    /**
     * Modify some country codes to fit with MyMemory
     * 
     * @param language
     *            An OmegaT language
     * @return A code modified for MyMemory languages
     */
    protected String mymemoryCode(Language language) {
        String lCode = language.getLanguageCode().toLowerCase();
        return lCode;
    }
    
    /**
     * Builds the URL for the XML query
     */
    protected String buildMyMemoryUrl(Language sLang, Language tLang, String text, String format) throws UnsupportedEncodingException {
        String sourceLang = mymemoryCode(sLang);
        String targetLang = mymemoryCode(tLang);
        String url2 = GT_URL2.replace("#sourceLang#", sourceLang).replace("#targetLang#", targetLang).replace("#format#", format);
        String url = GT_URL + URLEncoder.encode(text, "UTF-8") + url2;
    
        return url;
    }   
    
    protected String getMyMemoryResponse(Language sLang, Language tLang, String text, String format) throws UnsupportedEncodingException, IOException {
        String url = buildMyMemoryUrl(sLang, tLang, text, format);

        // Get email from systemProperties to enable 1000rq/day instead of 100 rq/day
        String email = System.getProperty(MYMEMORY_API_EMAIL);
        if ((email != null) && ! ("".equals(email))) {
            url = url + "&de=" + email;
        }
        
        // Get the results from MyMemory
        String myMemoryResponse = "";
        try {
            myMemoryResponse = WikiGet.getURL(url);
        } catch (IOException e) {
            throw e;
        }
        
        return myMemoryResponse;
    }

    protected String cleanUpText(String str) {
           str = str.replace("&quot;", "\"");
           str = str.replace("&nbsp;", "\u00A0");
           str = str.replace("&amp;", "&");
           str = str.replace("&apos;", "'");
           str = str.replace("&#39;", "'");
           str = str.replace("&lt;", "<");
           str = str.replace("&gt;", ">");
           str = str.trim();
           
        return str;
    }
    
    // ----------------------- IExternalMemory interface -----------------
    
    @Override
    protected String getPreferenceName() {
        return "allow_mymemory_translation_memory";	// plugin, so not in Preferences
    }

    @Override
    public String getProviderName() {
        return OStrings.getString("MT_ENGINE_MYMEMORY_HUMAN");
    }
    
    @Override
    public String getMemoryName() {
        return OStrings.getString("MT_ENGINE_MYMEMORY_HUMAN");
    }
    
    public static final SimpleDateFormat dateFormat1 = new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'", java.util.Locale.ENGLISH);
    
    public List<PrepareTMXEntry> retreiveMatchingTranslations (Language sLang, Language tLang, String text, int minScore, int maxCount) throws Exception {
        String tmxResponse = "";
        
        // Get MyMemory response in TMX format
        try {
            tmxResponse = getMyMemoryResponse(sLang, tLang, text, "tmx");
        }
        catch(Exception e) {
            e.printStackTrace();
            return (List<PrepareTMXEntry>) Collections.EMPTY_LIST;
        }

        // Adjust DTD location and bug in entity encoding; the second line should be removed as soon as the bug is 
        // fixed by MyMemory; TODO: Use local DTD
        tmxResponse = tmxResponse.replace("<!DOCTYPE tmx SYSTEM \"tmx11.dtd\">", "");
        tmxResponse = tmxResponse.replace("&", "&amp;");
        
        // We must remove anything before the XML declaration, otherwise we get an exception when creating the
        // DOM object. Currently, MyMemory returns \r\n<?xml
        tmxResponse = getXMLString(tmxResponse);
        
        // Build DOM object from the returned XML string
        InputSource source = new InputSource(new StringReader(tmxResponse));
        Document document = docFactory.newDocumentBuilder().parse(source);
        
        // Set up Xpath stuff
        XPath xpath = xpathFactory.newXPath();

        // Get all TUs
        XPathExpression expr = xpath.compile(XPATH_QUERY_ALL_TU);
        Object result = expr.evaluate(document, XPathConstants.NODESET);
        NodeList allTUs = (NodeList) result; 
        if (allTUs.getLength() == 0) return (List<PrepareTMXEntry>) Collections.EMPTY_LIST;

        List<PrepareTMXEntry> entries = new ArrayList<PrepareTMXEntry> (allTUs.getLength());
        String targetSegQueryString = XPATH_QUERY_TUV.replace("#langCode#", tLang.getLanguageCode());
        String sourceSegQueryString = XPATH_QUERY_TUV.replace("#langCode#", sLang.getLanguageCode());
        
        // Convert allTUs (XML) to List<PrepareTMXEntry>
        Element tu; PrepareTMXEntry entry;
        for (int i = 0; i < allTUs.getLength(); i++) {
            tu = (Element) allTUs.item(i);
            
            entry = new PrepareTMXEntry();
            entry.source = xpath.evaluate(sourceSegQueryString, tu);
            entry.translation = xpath.evaluate(targetSegQueryString, tu);
            
            try {
                entry.creator = tu.getAttribute("creationid");
                entry.creationDate = dateFormat1.parse(tu.getAttribute("creationdate")).getTime();
                entry.changer = tu.getAttribute("changeid");
                entry.changeDate = dateFormat1.parse(tu.getAttribute("changedate")).getTime();
            } catch (Exception e) {
                // Nothing : keep date as is.
            }
            entries.add (entry);            
        }
        
        return entries;
    }
    
     /**
     * Removes any character before &lt;?xml in a string.
     * This prevents an exception when trying to create a DOM object from that string.
     * @param str The input string
     * @return The string starting with &lt;?xml, if found, or the initial string
     */
    protected String getXMLString(String str) {
        int XMLHeader = str.indexOf("<?xml");
        if (XMLHeader != -1) { // XML header is not at the beginning
            str = str.substring(XMLHeader);
        }
        return str;
    }
}
