package cal_posterior;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
  cal_p_a_t
);

sub cal_p_a_t{
  my ($input, $output, $authormapfile, $originalfile, $isdebug) = @_;
  my $line;
  my ($i, $k, $d);
  my @naz;
  my @na;
  my @nz;
  my $nall;
  my @pza;
  my (@nwz, @ncz);
  my @docs;
  my $idx=-1;
  my (@nd, @ndsum);
  my @pzd;

  
  open(is, $input);
  while($line = <is>){
    chop($line);
    if($line=~/^\s*\#@(.*?)$/){
      $line = $1;
      my @tmps = split(/\s+/, $line);
      $docs[$idx+1]{"authorsid"} = \@tmps;
      $idx++;
    } elsif($line=~/^\s*\#c(\d+)$/){
      $docs[$idx]{"confid"} = $1;
    } elsif($line=~/^\s*\#/){
      next;
    } else{
      my @assigns = split(/\s+/, $line);
      $ndsum[$idx] = @assigns;
      my @wsid;
      for($i=0; $i<@assigns; $i++){
        my ($wid, $z, $aid) = split(/\:/, $assigns[$i]);
        $naz[$aid][$z]++;
        $nwz[$wid][$z]++;
        $ncz[$docs[$idx]{"confid"}][$z]++;
        $na[$aid]++;
        $nz[$z]++;
        $nall++;
        $nd[$idx][$z]++;
        push(@wsid, $wid);
      }
      $docs[$idx]{"wordsid"} = \@wsid;
    }
  }
  close(is);
  
  my $acount = @na;
  my $aid;
  my $alpha = 50.0/@nz;
  my $beta = 0.01;
  my $Vbeta = $beta * @nzw;
  my $Kalpha = $alpha * @nz;
  my $cbeta = 0.1;
  my $Cbeta = $cbeta * @ncz;
  open(os, ">".$output);
  open(ospzd, ">doc".$output);
  for($k=0; $k<@nz; $k++){
    for($aid=0; $aid<$acount; $aid++){
      my %tmp;
      $pza[$k][$aid] = \%tmp;
      $tmp{"aid"} = $aid;
      $tmp{"score"} = ($naz[$aid][$k] + $alpha)/($na[$aid] + $Kalpha) * ($na[$aid]/$nz[$k]);
      my $str = sprintf("%.8f", $pza[$k][$aid]{"score"});
      print os $str;
      print os " " if($k<(@nz-1));
    }
    print os "\n";
    
    
    #my $lambda = 0.000000001;
    for($d=0; $d<@nd; $d++){


      my $score = 1.0;#log($pt+ $lambda);
      my $ppp = $docs[$d]{"authorsid"};
      my $authornum = @$ppp;
      my $tokennum;
      for($i=0; $i<$authornum; $i++){
        $score *= $pza[$k][$$ppp[$i]]{"score"};
        #$score += log($pza[$k][$$ppp[$i]]{"score"} + $lambda);
      }
      $ppp = $docs[$d]{"wordsid"};
      $tokennum = @$ppp;
      for($i=0; $i<$tokennum; $i++){
        $score *= ($nwz[$$ppp[$i]][$k] + $beta)/($nz[$k] + $Vbeta);
        $score *= ($ncz[$docs[$d]{"confid"}][$k] + $cbeta)/($nz[$k] + $Cbeta);
      }

#      $score /= ($authornum * $tokennum * $tokennum);

      #$score = ($nd[$d][$k] + $alpha2)/($nz[$k] + $Kalpha)/;


      my %tmp;
      $pzd[$k][$d] = \%tmp;
      $tmp{"idx"} = $d;
      $tmp{"score"} = $score;
      my $str = sprintf("%.8f", $pzd[$k][$d]{"score"});
      print ospzd $str;
      print ospzd " " if($k<(@nz-1));
    }
    print ospzd "\n";
  }
  close(os);
  close(ospzd);
  

  

  #for debug only
  #output top ranked author for each topic

  if($isdebug){
    my @pa;
    open(osdebug, ">p_a.txt");
    for($aid=0; $aid<$acount; $aid++){
      $pa[$aid] = $na[$aid]/$nall;
      my $str = sprintf("%.8f", $pa[$aid]);
      print osdebug $pa[$aid]."\n";
    }
    close(osdebug);
  
    my @authors;
    open(isdebug, $authormapfile);
    $line=<isdebug>;
    while($line=<isdebug>){
      chop($line);
      my($name, $id) = split(/\t/, $line);
      $authors[$id] = $name;
    }
    close(isdebug);
    
    open(osdebug, ">topic_authors.txt");
    for($k=0; $k<@nz; $k++){
      my @p;
      for($i=0; $i<$acount; $i++){
        $p[$i] = $pza[$k][$i];
      }
      
      no strict;
      my @results = sort{
       if($$a{"score"} > $$b{"score"}){return -1;}
       elsif($$a{"score"} < $$b{"score"}){return 1;}
       else{return 0;}
      } @p;
      use strict;
      
      print osdebug "Topic $k\n";
      for($i=0; $i<20; $i++){
        $aid = $results[$i]{"aid"};
        print osdebug "\t".$aid."\t".$authors[$aid]."\t".$results[$i]{"score"}."\n";
      }
      print "\n";
    }
    close(osdebug);
    
    
    #output probability of docs given topics
    my @docsmap;
    open(isdebug, $originalfile);
    $line=<isdebug>;
    my $docid = 0;
    my $title;
    while($line=<isdebug>){
      chop($line);
      if($line=~/^\s*\#\*(.*?)$/){
        $title = $1;
      }else{
        next;
      }
      $docsmap[$docid] = $title;
      $docid++;
    }
    close(isdebug);
    
    open(osdebug, ">topic_docs.txt");
    for($k=0; $k<@nz; $k++){
      my @p;
      for($i=0; $i<@nd; $i++){
        $p[$i] = $pzd[$k][$i];
      }

      no strict;
      my @results = sort{
       if($$a{"score"} > $$b{"score"}){return -1;}
       elsif($$a{"score"} < $$b{"score"}){return 1;}
       else{return 0;}
      } @p;
      use strict;

      print osdebug "Topic $k\n";
      for($i=0; $i<20; $i++){
        $docid = $results[$i]{"idx"};
        print osdebug "\t".$docid."\t".$docsmap[$docid]."\t".$results[$i]{"score"}."\n";
      }
      print "\n";
    }
    close(osdebug);
  }
}
