Skip to content

Commit

Permalink
Now it also creates a list of verb transitivity
Browse files Browse the repository at this point in the history
  • Loading branch information
wcolen committed Jun 17, 2012
1 parent a91f5f8 commit 91a41ca
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion scripts/createCogrooFile.pl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ sub install {

my $isCollectTags = 0;
my $isCollectContractions = 0;
my $isCollectVerbTransitivity = 1;

# ptbr.dic for production, sample.dic for test

Expand All @@ -54,6 +55,11 @@ sub install {
open (CON, '>:encoding(UTF-8)', $out.'contractions.txt');
}

if($isCollectVerbTransitivity) {
# the contractions
open (TRAN, '>:encoding(UTF-8)', $out.'trans.txt');
}

# hash to remove duplicates and sort... is it necessary for simple? maybe we should serialize it directly to make it faster

my %tags;
Expand Down Expand Up @@ -110,6 +116,7 @@ sub install {
my $analisis;
my $rad;
if(!($dword =~ m/\S-\S/ && ${$key}{'CAT'} eq 'v')) { #avoid amar-lhe, amo-lha-ei etc
my $trans;
while ( my ($k,$v) = each %$key ) {
if( $k eq "rad" ) {
$rad = $v;
Expand All @@ -120,9 +127,15 @@ sub install {
$tags{"$k:$v"} = 1; # enable to create a log of tags
}
}
elsif( $k eq 'TR' ) {
$trans = $v;
}
}
$rad =~ s/ /_/g;
print SIMPLE "$dword $rad>$analisis\n";
if($trans) {
print TRAN "$dword\t$rad\t$trans\n";
}
#$simple{$dword}{"$rad>$analisis"} = 1;
if($isCollectContractions && ${$key}{'CAT'} eq 'cp') {
$con{$dword} = 1;
Expand Down Expand Up @@ -158,4 +171,6 @@ sub install {
close CON or die "bad CON: $! $?";
}


if($isCollectVerbTransitivity) {
close TRAN or die "bad TRAN: $! $?";
}

0 comments on commit 91a41ca

Please sign in to comment.