/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool 
          with fuzzy matching, translation memory, keyword search, 
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2009 Alex Buloichik
               2010 Arno Peters
               2013-2014 Alex Buloichik
               2015 Aaron Madlon-Kay
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.core.statistics;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.omegat.core.Core;
import org.omegat.core.data.IProject;
import org.omegat.core.data.IProject.FileInfo;
import org.omegat.core.data.ProjectProperties;
import org.omegat.core.data.ProtectedPart;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.data.TMXEntry;
import org.omegat.util.OConsts;
import org.omegat.util.OStrings;
import org.omegat.util.StaticUtils;
import org.omegat.util.gui.TextUtil;

/**
 * Thread for calculate standard statistics.
 * 
 * Calculation requires two different tags stripping: one for calculate unique and remaining, and second for
 * calculate number of words and chars.
 * 
 * Number of words/chars calculation requires to just strip all tags, protected parts, placeholders(see StatCount.java).
 * 
 * Calculation of unique and remaining also requires to just strip all tags, protected parts, placeholders for
 * standard calculation.
 * 
 * @author Alex Buloichik (alex73mail@gmail.com)
 * @author Arno Peters
 * @author Aaron Madlon-Kay
 */
public class CalcStandardProjectStatistics extends CalcStandardStatistics {

    public CalcStandardProjectStatistics(IStatisticsPanel.Standard callback) {
        super(callback);
    }

    @Override
    public void run() {
        IProject p = Core.getProject();
        String result = buildProjectStats(p, null, callback);
        callback.setTextData(result);
        callback.finishData();

        String internalDir = p.getProjectProperties().getProjectInternal();
        // removing old stats
        try {
            File oldstats = new File(internalDir + "word_counts");
            if (oldstats.exists())
                oldstats.delete();
        } catch (Exception e) {
        }

        // now dump file based word counts to disk
        String fn = internalDir + OConsts.STATS_FILENAME;
        Statistics.writeStat(fn, result);
        callback.setDataFile(fn);
    }

    /** Convenience method */
    public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat) {
        return buildProjectStats(project, hotStat, null);
    }
    
    /**
     * Builds a file with statistic info about the project. The total word &
     * character count of the project, the total number of unique segments, plus
     * the details for each file.
     */
    public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final IStatisticsPanel.Standard callback) {

        FileData global = new FileData(null);

        List<FileData> counts = new ArrayList<FileData>();
        for (FileInfo file : project.getProjectFiles()) {
            FileData numbers = new FileData(file.filePath);
            counts.add(numbers);
            for (SourceTextEntry ste : file.entries) {
                String src = ste.getSrcText();
                for (ProtectedPart pp : ste.getProtectedParts()) 
                    src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());            
                TMXEntry tr = project.getTranslationInfo(ste.getKey());
                global.register(src, tr.isTranslated());
                numbers.register(src, tr.isTranslated());
            }
            global.addFile(numbers);
        }

        StringBuilder result = new StringBuilder();

        result.append(OStrings.getString("CT_STATS_Project_Statistics"));
        result.append("\n\n");

        String[][] headerTable = calcHeaderTable(new StatCount[] { global.total, global.remaining, global.unique, global.remainingUnique });
        if (callback != null) {
            callback.setProjectTableData(htHeaders, headerTable);
        }
        result.append(TextUtil.showTextTable(htHeaders, headerTable, htAlign));
        result.append("\n\n");

        // STATISTICS BY FILE
        result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
        result.append("\n\n");
        String[][] filesTable = calcFilesTable(project.getProjectProperties().getSourceRoot(), counts);
        if (callback != null) {
            callback.setFilesTableData(ftHeaders, filesTable);
        }
        result.append(TextUtil.showTextTable(ftHeaders, filesTable, ftAlign));

        if (hotStat != null) {
            hotStat.numberOfSegmentsTotal = global.total.segments;
            hotStat.numberofTranslatedSegments = global.unique.segments - global.remainingUnique.segments;
            hotStat.numberOfUniqueSegments = global.unique.segments;
            hotStat.uniqueCountsByFile.clear();
            for (FileData fd : counts) {
                hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
            }
        }

        return result.toString();
    }
}
