my $ORI = shift or die "Syntax : $0 <original sdlxliff file> <translated renum-xliff file> <project_save.tmx>? <output sdlxliff file?>";
my $TRA = shift or die "Syntax : $0 <original sdlxliff file> <translated renum-xliff file> <project_save.tmx>? <output sdlxliff file?>";
my $TMX = shift; unless ($TMX =~ /\.tmx$/) { unshift (@ARGV, $TMX); $TMX = undef; }
my $OUT = shift; unless ($OUT) { $OUT = $ORI; $OUT =~ s/\./-TRA./; }

my %NOTES = ();
if ($TMX) {
	open (TMX, $TMX) or die "Could not open TMX file: '$TMX' ($!)";
	local $/ = '</tu>'; while (<TMX>) {
		my ($note) = m!<note>(.+?)</note>!s or next;
		my ($src) = m!<seg>(.+?)</seg>!s or next; # project_save.tmx : always source followed by target
		my ($id) = m!<prop type="id">(.+?)</prop>!;
		if ($id) { $NOTES{"$id"} = $note; } else { $NOTES{$src} = $note; }
	}
	close(TMX);
}

open(ORI, '<:encoding(utf-8)', $ORI) or die "Could not open '$ORI': $!";
open (OUT, '>:encoding(utf-8)', $OUT) or die "Cannot write $OUT: $!"; 

local $/ = '</header>'; $ORI_CONTENTS = <ORI>; our %NoteId = ();
if (%NOTES) {
	my $Generator = undef;
	if (eval "require Win32;") { $Generator = \&Win32::GuidGen; }
	if (eval "require Data::GUID;") { $Generator = sub { return Data::GUID->new()->as_string(); }; }
	unless ($Generator) {
		print STDERR "Cound not find any algorithm for GUID. Notes will not be generated.\n";
	} else {
		while (my ($k, $v) = each(%NOTES)) {
			my $id = lc(&$Generator()); $id =~ s/[\{\}]//g; $NoteId{$k} .= "$id;";
			$ORI_CONTENTS =~ s!</cmt-defs>!<cmt-def id="$id"><Comments><Comment>$v</Comment></Comments></cmt-def></cmt-defs>!
				or $ORI_CONTENTS =~ s!</doc-info>!<cmt-defs><cmt-def id="$id"><Comments><Comment>$v</Comment></Comments></cmt-defs></doc-info>!;
		}
	}
}
$ORI_CONTENTS =~ s{</tag-defs>}{
    <tag id="omegat-italic"><bpt name="cf" word-end="false" can-hide="true">&lt;cf italic="true"&gt;</bpt><ept name="cf" word-end="false" can-hide="true">&lt;/cf&gt;</ept></tag>
    <tag id="omegat-bold"><bpt name="cf" word-end="false" can-hide="true">&lt;cf bold="true"&gt;</bpt><ept name="cf" word-end="false" can-hide="true">&lt;/cf&gt;</ept></tag>
    <tag id="omegat-underline"><bpt name="cf" word-end="false" can-hide="true">&lt;cf underline="true"&gt;</bpt><ept name="cf" word-end="false" can-hide="true">&lt;/cf&gt;</ept></tag>
    <tag id="omegat-superscript"><bpt name="cf" word-end="false" can-hide="true">&lt;cf superscript="true"&gt;</bpt><ept name="cf" word-end="false" can-hide="true">&lt;/cf&gt;</ept></tag>
    <tag id="omegat-subscript"><bpt name="cf" word-end="false" can-hide="true">&lt;cf subscript="true"&gt;</bpt><ept name="cf" word-end="false" can-hide="true">&lt;/cf&gt;</ept></tag>
    </tag-defs>
}x;
print OUT $ORI_CONTENTS;	# restore header

$/ = '</trans-unit>';
open (TRA, '<:encoding(utf-8)', $TRA) or die "Could not open '$TRA': $!";

while (defined ($ORI_CONTENTS = <ORI>)) {
    my ($Unit_ID) = ($ORI_CONTENTS =~ m!<trans-unit(?:\s.*?)id=\"([0-9A-Fa-f\-]+)\"!) or next;
    # Check if the segment is only tags, in which case we translate it as source
    my $SrcText = $1 if $ORI_CONTENTS =~ m{<source>(.+?)</source>}s; $SrcText =~ s/\<.+?\>//gs;
    unless ($SrcText) {
        undef $TRA_CONTENTS; $TRA_CONTENTS = $1 if $ORI_CONTENTS =~ m{<seg-source[^>]*>(.+?)</seg-source>};  # translate as seg-source
        $ORI_CONTENTS =~ s!<target((\s*/>)|(.*</target>))!<target>$TRA_CONTENTS</target>!g if $TRA_CONTENTS;
        next; # but continue -> print
    }
    # Replace translation with contents from the translated file
    $TRA_CONTENTS = <TRA> until $TRA_CONTENTS =~ m!<trans-unit(?:\s.*)id="$Unit_ID"!;
    if ($TRA_CONTENTS =~ m!<target(?: renum="\d*")?>(.+)</target>!) { $TRA_CONTENTS = $1; } else { next; }
    $TRA_CONTENTS =~ s!^<mrk mid="\d+" mtype="seg">(.+)</mrk>$!$1! unless $ORI_CONTENTS =~ m!<seg-source.*?><mrk!; 
    my ($first) = ($ORI_CONTENTS =~ m!<[gx] id="(\d+)"!); $TRA_CONTENTS = restore_num($first,$TRA_CONTENTS) if $first;
    $ORI_CONTENTS =~ s!<target((\s*/>)|(.*</target>))!<target>$TRA_CONTENTS</target>!g;
    # Add note marks
    if (my $notes = ($NoteId{$Unit_ID} || $NoteId{$srcText})) {
        foreach my $id (split (/;/, $notes)) {
            $ORI_CONTENTS =~ s{<target>(.+?)</target>}{<target><mrk mtype="x-sdl-comment" sdl:cid="$id">$1</mrk></target>};
        }
    }
    # Set statuses to translated
    $ORI_CONTENTS =~ s!<sdl:seg id="($1)"/>!<sdl:seg id="$1" conf="Translated"/>!gs 
        while $TRA_CONTENTS =~ m!<mrk mid="(\d+)" mtype="seg">([^>]+?)</mrk>!gs;    # only to translated segments
    # Convert pseudo-tags to real ones
	$ORI_CONTENTS =~ s!(\s*)\x{2460}(.+?)\x{2776}(\s*)!$1<g id="omegat-italic">$2</g>$3!gs;
	$ORI_CONTENTS =~ s!(\s*)\x{2461}(.+?)\x{2777}(\s*)!$1<g id="omegat-bold">$2</g>$3!gs;
	$ORI_CONTENTS =~ s!(\s*)\x{2462}(.+?)\x{2778}(\s*)!$1<g id="omegat-underline">$2</g>$3!gs;
	$ORI_CONTENTS =~ s!(\s*)\x{2463}(.+?)\x{2779}(\s*)!$1<g id="omegat-superscript">$2</g>$3!gs;
	$ORI_CONTENTS =~ s!(\s*)\x{2464}(.+?)\x{277A}(\s*)!$1<g id="omegat-subscript">$2</g>$3!gs;
} continue {
    print OUT $ORI_CONTENTS;
}


close ORI; close TRA; close OUT;


sub restore_num($$) {
    my $first = shift; my $txt = shift;
    $txt =~ s!<(g|x) id="(\d+)"!"<$1 id=\"" . ($2 + $first) . "\""!ge;
    return $txt;
}
