-
Notifications
You must be signed in to change notification settings - Fork 0
/
CreatePairedFile.pl
48 lines (42 loc) · 1.43 KB
/
CreatePairedFile.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
my $sourceLang = shift;
my $targetLang = shift;
my $outputFolder = shift;
my $output = $outputFolder . "$sourceLang\_$targetLang.txt";
my $sourceInput = $outputFolder . "alignment_$sourceLang\_$targetLang.txt";
my $targetInput = $outputFolder . "alignment_$targetLang\_$sourceLang.txt";
open SOURCE, "<:encoding(utf-8)", $sourceInput or die "Cannot open file $sourceInput: $!\n";
open TARGET, "<:encoding(utf-8)", $targetInput or die "Cannot open file $targetInput: $!\n";
open OUTPUT, ">:utf8", $output or die "Cannot open file $output: $!\n";
my %sourceHT = ();
my %targetHT = ();
my %titlePairs = ();
while (<SOURCE>) {
chomp($_);
my @temp = split(/\t/, $_);
my $id = $temp[0];
my $sourceTitle = $temp[1];
my $targetTitle = $temp[2];
$sourceHT{$sourceTitle} = $id;
$titlePairs{$sourceTitle} = $targetTitle;
}
close SOURCE;
while (<TARGET>) {
chomp($_);
my @temp = split(/\t/, $_);
my $id = $temp[0];
my $targetTitle = $temp[1];
my $sourceTitle = $temp[2];
$targetHT{$targetTitle} = $id;
$titlePairs{$sourceTitle} = $targetTitle;
}
close TARGET;
foreach my $sourceTitle (keys %titlePairs) {
my $targetTitle = $titlePairs{$sourceTitle};
if ((exists $sourceHT{$sourceTitle}) && (exists $targetHT{$targetTitle})) {
print OUTPUT $sourceHT{$sourceTitle} . "\t";
print OUTPUT $sourceTitle . "\t";
print OUTPUT $targetHT{$targetTitle} . "\t";
print OUTPUT $targetTitle . "\n";
}
}
close OUTPUT;