
/* 
 * References:
 * + The Java code of Gregor Heinrich (gregor@arbylon.net)
 *   http://www.arbylon.net/projects/LdaGibbsSampler.java
 * + "Parameter estimation for text analysis" by Gregor Heinrich
 *   http://www.arbylon.net/publications/text-est.pdf
 */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <algorithm>

#include "constants.h"
#include "strtokenizer.h"
#include "utils.h"
#include "dataset.h"
#include "model.h"
#include "mathlib.h"

#define isnan(x) ((x) != (x))

using namespace std;

model::~model() {
	/*
	if (p) {
	delete p;
    }

    if (ptrndata) {
	delete ptrndata;
    }
    
    if (pnewdata) {
	delete pnewdata;
    }

    if (z) {
	for (int m = 0; m < M; m++) {
	    if (z[m]) {
		delete z[m];
	    }
	}
	delete z;
    }

  
    if (nw) {
	for (int w = 0; w < V; w++) {
	    if (nw[w]) {
		delete nw[w];
	    }
	}
	delete nw;
    }

    if (nd) {
	for (int m = 0; m < M; m++) {
	    if (nd[m]) {
		delete nd[m];
	    }
	}
	delete nd;
    } 

	if (na) {
	for (int a = 0; a < A; a++) {
	    if (na[a]) {
		delete na[a];
	    }
	}
	delete na;
    }
	
    
    if (nwsum) {
	delete nwsum;
    }   
    
    if (ndsum) {
	delete ndsum;
    }
    
	if (naksum) {
	delete naksum;
    }   

	if(nkasum){
		delete nkasum;
	}

    if (theta) {
	for (int m = 0; m < M; m++) {
	    if (theta[m]) {
		delete theta[m];
	    }
	}
	delete theta;
    }

	if(psi){
		for(int k=0; k<K; k++){
			if(psi[k])
				delete psi[k];
		}
		delete psi
	}

	if(theta_ak){
		for(int a =0; a<A; a++){
			if(theta_ak[a]){
				delete theta_ak[a];
			}
		}
	}

	if(topic_author_ka){
		for(int k=0; k<K; k++){
			if(topic_author_ka[k]){
				delete topic_author_ka[k];
			}
		}
	}
    
    if (phi) {
	for (int k = 0; k < K; k++) {
	    if (phi[k]) {
		delete phi[k];
	    }
	}
	free(phi);
    }
	

    // only for inference
    if (newz) {
	for (int m = 0; m < newM; m++) {
	    if (newz[m]) {
		delete newz[m];
	    }
	}
	free(newz);
    }

    
    if (newnw) {
	for (int w = 0; w < newV; w++) {
	    if (newnw[w]) {
		delete newnw[w];
	    }
	}
	free(newnw);
    }

    if (newnd) {
	for (int m = 0; m < newM; m++) {
	    if (newnd[m]) {
		delete newnd[m];
	    }
	}
	free(newnd);
    } 
    
    if (newnwsum) {
	delete newnwsum;
    }   
    
    if (newndsum) {
	delete newndsum;
    }
    
    if (newtheta) {
	for (int m = 0; m < newM; m++) {
	    if (newtheta[m]) {
		delete newtheta[m];
	    }
	}
	free(newtheta);
    }
    
    if (newphi) {
	for (int k = 0; k < K; k++) {
	    if (newphi[k]) {
		delete newphi[k];
	    }
	}
	free(newphi);
    }
	*/
}

void model::set_default_values() {
    wordmapfile = "wordmap.txt";
	authormapfile = "authormap.txt";
	confmapfile = "confmap.txt";
    trainlogfile = "trainlog.txt";
    tassign_suffix = ".tassign";
    theta_suffix = ".theta";
	theta_ak_suffix = ".theta_ak";
	theta_ck_suffix = ".theta_ck";
	topic_doc_suffix = ".topic_doc";
	topic_author_suffix = ".topic_author";
	topic_conf_suffix = ".topic_conf";
	topic_suffix = ".topic";
    phi_suffix = ".phi";
	psi_suffix = ".psi";
    others_suffix = ".others";
    twords_suffix = ".twords";
	tauthors_suffix = ".tauthors";
	tconf_suffix = ".tconf";
	test_topic_given_doc_suffix = ".test_topic_given_doc";
	test_doc_given_topic_suffix = ".test_doc_given_topic";
    dir = "./";
    dfile = "trndocs.dat";
    model_name = "model-final";    
    model_status = MODEL_STATUS_UNKNOWN;
    
    ptrndata = NULL;
    pnewdata = NULL;
    
    M = 0;
    V = 0;
    K = 100;
    alpha = 50.0 / K;
    beta = 0.1;
	mu = 0.1;
    niters = 2000;
    liter = 0;
    savestep = 200;    
    twords = 0;
    withrawstrs = 0;
    
//    p = NULL;
    z = NULL;
    nw = NULL;
    nd = NULL;
    na = NULL;
	nwsum = NULL;
    ndsum = NULL;
	naksum = NULL;
	nkasum = NULL;
    theta = NULL;
	theta_ak = NULL;
	topic_author_ka = NULL;
    phi = NULL;

    newM = 0;
    newV = 0;
    newz = NULL;
    newnw = NULL;
    newnd = NULL;
    newnwsum = NULL;
    newndsum = NULL;
    newtheta = NULL;
    newphi = NULL;
}

int model::parse_args(int argc, char ** argv) {
    return utils::parse_args(argc, argv, this);
}

int model::init(int argc, char ** argv) {
    // call parse_args
    if (parse_args(argc, argv)) {
		return 1;
    }
    
    if (model_status == MODEL_STATUS_EST) {
		// estimating the model from scratch
		if (init_est()) {
			return 1;
		}	
    } else if (model_status == MODEL_STATUS_ESTC) {
		// estimating the model from a previously estimated one
		if (init_estc()) {
			return 1;
		}	
    } else if (model_status == MODEL_STATUS_INF) {
		// do inference
		if (init_inf()) {
			return 1;
		}
    }
    
    return 0;
}

int model::load_model(string model_name) {
    int i=0, j;
    
    string filename = dir + model_name + tassign_suffix;
    FILE * fin = fopen(filename.c_str(), "r");
    if (!fin) {
		printf("Cannot open file %d to load model!\n", filename.c_str());
		return 1;
    }
    
    char buff[BUFF_SIZE_LONG];
    string line;

    // allocate memory for z and ptrndata
    z = new int*[M];
    ptrndata = new dataset(M);
    ptrndata->V = V;
	document * pdoc;
	while(true){
		if(fgets(buff, BUFF_SIZE_LONG - 1, fin)==NULL)
			break;
				
		line = buff;
		line = utils::trimstring(line);
		if(line.length()<=2)
			continue;
		if(line.at(0)=='#'){
			if(line.at(1)=='@'){//author id
				pdoc = new document();
				strtokenizer strtok(line, " \t\r\n");
				int acount = strtok.count_tokens();
				pdoc->setauthorcount(acount);
				for(j=0; j<acount; j++){
					pdoc->authors[j] = atoi(strtok.token(j).c_str());
				}
			}else if(line.at(1)=='c'){//conf id
				if(pdoc==NULL){
					printf("why pdoc==NULL\n");
					return 1;
				}
				pdoc->confid = atoi(line.c_str());
			}else{
				continue;
			}
		}else{
			if(pdoc==NULL){
				printf("why pdoc==NULL\n");
				return 1;
			}
			strtokenizer strtok(line, " \t\r\n");
			int length = strtok.count_tokens();
			
			int author, topic, ka;
			vector<int> words;
			vector<int> topics;
			vector<int> authors;
			for (j = 0; j < length; j++) {
				string token = strtok.token(j);
		    
				strtokenizer tok(token, ":");
				if (tok.count_tokens() != 2 && tok.count_tokens()!=3) {
					printf("Invalid word-topic assignment line!\n");
					return 1;
				}
				author = atoi(tok.token(2).c_str());
				topic = atoi(tok.token(1).c_str());
				

				words.push_back(atoi(tok.token(0).c_str()));
				topics.push_back(topic);
				authors.push_back(author);
			}
			
			// allocate and add new document to the corpus
			pdoc->setwords(words);
			ptrndata->add_doc(pdoc, i);
			
			// assign values for z
			z[i] = new int[topics.size()];
			for (j = 0; j < topics.size(); j++) {
				ka = authors[j]*K + topics[j];
				z[i][j] = ka;
			}
			pdoc = NULL;
			i++;
		}
    }   
    
    fclose(fin);
    
    return 0;
}

int model::save_model(string model_name, bool isfinal) {
    string newdir;

	if(!isfinal){
		newdir = dir + "\\tmp\\";
	}else{
		newdir = dir;
	}

	string k_str = utils::int2str(K);
	if (save_model_tassign(newdir + model_name + k_str + tassign_suffix )) {
		return 1;
    }
    
    if (save_model_others(newdir + model_name + k_str + others_suffix )) {
		return 1;
    }
    
    if (save_model_theta(newdir + model_name + k_str + theta_suffix) ) {
		return 1;
    }

	if(save_model_theta_ak(newdir + model_name + k_str + theta_ak_suffix)){
		return 1;
	}

	if(save_model_theta_ck(newdir + model_name + k_str + theta_ck_suffix)){
		return 1;
	}

	if(save_model_topic_doc_kd(newdir + model_name + k_str + topic_doc_suffix)){
		return 1;
	}

	if(save_model_topic_author_ka(newdir + model_name + k_str + topic_author_suffix)){
		return 1;
	}
    
    if (save_model_phi(newdir + model_name + k_str + phi_suffix)) {
		return 1;
    }

	if(save_model_topic_conf(newdir + model_name + k_str + topic_conf_suffix)){
		return 1;
	}
	
	if(save_model_topic(newdir + model_name + k_str + topic_suffix)){
		return 1;
	}
	if(save_model_test_topic_given_doc(newdir + model_name + k_str + test_topic_given_doc_suffix)){
		return 1;
	}
	if(save_model_test_doc_given_topic(newdir + model_name + k_str + test_doc_given_topic_suffix)){
		return 1;
	}
    
    if (twords > 0) {
		if (save_model_twords(newdir + model_name + k_str + twords_suffix)) {
			return 1;
		}
		if(save_model_tauthors(newdir + model_name + k_str + tauthors_suffix)) {
			return 1;
		}
		if(save_model_tconfs(newdir + model_name + k_str + tconf_suffix)){
			return 1;
		}
    }


    return 0;
}

int model::compute_and_save_perplexity(string filename){
	double perplexity = compute_perplexity();
	FILE * fout = fopen(filename.c_str(), "a");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }

	fprintf(fout, "K=%d\n",K);
	fprintf(fout, "perplexity=%f\n", perplexity);
	fprintf(fout, "\n");
	fclose(fout);
	return 0;
}

int model::save_model_tassign(string filename) {
    int i, j;
    
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }

    // wirte docs with topic assignments for words
    for (i = 0; i < ptrndata->M; i++) {    
		fprintf(fout, "#@");
		for(j=0; j<ptrndata->docs[i]->authorcount; j++){
			fprintf(fout, "%d ", ptrndata->docs[i]->authors[j]);
		}
		fprintf(fout, "\n#c%d\n", ptrndata->docs[i]->confid);		
		for (j = 0; j < ptrndata->docs[i]->length; j++) {
			int ka = z[i][j];
			int author = (int)(ka/K);
			//author = ptrndata->docs[m]->authors[tmp];
			int topic = ka%K;
			fprintf(fout, "%d:%d:%d ", ptrndata->docs[i]->words[j], topic, author);
		}
		fprintf(fout, "\n");
    }

    fclose(fout);
    
    return 0;
}

int model::save_model_theta(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < M; i++) {
	for (int j = 0; j < K; j++) {
	    fprintf(fout, "%f ", theta[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}

int model::save_model_theta_ak(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < A; i++) {
	for (int j = 0; j < K; j++) {
	    fprintf(fout, "%f ", theta_ak[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}
int model::save_model_theta_ck(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < C; i++) {
	for (int j = 0; j < K; j++) {
	    fprintf(fout, "%f ", theta_ck[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}
int model::save_model_topic_doc_kd(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < M; j++) {
	    fprintf(fout, "%f ", topic_doc_kd[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}
int model::save_model_topic_author_ka(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < A; j++) {
	    fprintf(fout, "%f ", topic_author_ka[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}

int model::save_model_topic_conf(string filename){
	FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < C; j++) {
	    fprintf(fout, "%f ", topic_conf[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}

int model::save_model_topic(string filename){
	FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }
    
    for (int i = 0; i < K; i++) {
	    fprintf(fout, "%f ", topic[i]);
		fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}

int model::save_model_test_topic_given_doc(string filename){
	FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }
    
    for (int i = 0; i < M; i++) {
	for (int j = 0; j < K; j++) {
	    fprintf(fout, "%f ", test_topic_given_doc[i][j]);
	}
	fprintf(fout, "\n");
    }
 
    
    fclose(fout);
    
    return 0;
}
int model::save_model_test_doc_given_topic(string filename){
	FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < M; j++) {
	    fprintf(fout, "%f ", test_doc_given_topic[i][j]);
	}
	fprintf(fout, "\n");
    }
 
    
    fclose(fout);
    
    return 0;
}
int model::save_model_phi(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < V; j++) {
	    fprintf(fout, "%f ", phi[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);    
    
    return 0;
}

int model::save_model_others(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }

    fprintf(fout, "alpha=%f\n", alpha);
    fprintf(fout, "beta=%f\n", beta);
    fprintf(fout, "ntopics=%d\n", K);
    fprintf(fout, "ndocs=%d\n", M);
    fprintf(fout, "nwords=%d\n", V);
    fprintf(fout, "nauthors=%d\n", A);
	fprintf(fout, "nconfs=%d\n", C);
	fprintf(fout, "nwordtoken=%d\n", TOTAL_W);
	fprintf(fout, "liter=%d\n", liter);
	
    
    fclose(fout);    
    
    return 0;
}

int model::save_model_twords(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    if (twords > V) {
	twords = V;
    }
    mapid2word::iterator it;
    double min = 0.0;

    for (int k = 0; k < K; k++) {
		vector<pair<int, double> > words_probs;
		
		pair<int, double> word_prob;
		for (int w = 0; w < V; w++) {
			/*
			if(words_probs.size()>twords && phi[k][w]<min){
				continue;
			}
			if(phi[k][w]<min){
				min = phi[k][w];
			}
			*/
			word_prob.first = w;
			word_prob.second = phi[k][w];			
			words_probs.push_back(word_prob);
		}
    
        // quick sort to sort word-topic probability
		//utils::quicksort(words_probs, 0, words_probs.size() - 1);
		std::sort(words_probs.begin(), words_probs.end(), PairIDGreater);		
		
		fprintf(fout, "Topic %dth:\n", k);
		for (int i = 0; i < twords; i++) {
			it = id2word.find(words_probs[i].first);
			if (it != id2word.end()) {
				fprintf(fout, "\t%s   %f\n", (it->second).c_str(), words_probs[i].second);
			}
		}
    }
    
    fclose(fout);    
    
    return 0;    
}

int model::save_model_tauthors(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    if (twords > A) {
	twords = A;
    }
    mapid2author::iterator it;
    
    for (int k = 0; k < K; k++) {
		vector<pair<int, double> > authors_probs;
		pair<int, double> author_prob;
		for (int a = 0; a < A; a++) {
			author_prob.first = a;
			author_prob.second = topic_author_ka[k][a];//phi[k][w];
			authors_probs.push_back(author_prob);
		}
    		
		std::sort(authors_probs.begin(), authors_probs.end(), PairIDGreater);
        // quick sort to sort word-topic probability
		//utils::quicksort(authors_probs, 0, authors_probs.size() - 1);
		
		fprintf(fout, "Topic %dth:\n", k);
		for (int i = 0; (i < twords && i<authors_probs.size()); i++) {
			it = id2author.find(authors_probs[i].first);
			if (it != id2author.end()) {
			fprintf(fout, "\t%s   %f\n", (it->second).c_str(), authors_probs[i].second);
			}
		}
		authors_probs.clear();		
    }
    
    fclose(fout);    
    
    return 0;    
}

int model::save_model_tconfs(string filename){
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }
    
    if (twords > C) {
		twords = C;
    }
    mapid2conf::iterator it;
    
    for (int k = 0; k < K; k++) {
		vector<pair<int, double> > confs_probs;
		pair<int, double> conf_prob;
		for (int c = 0; c < C; c++) {
			conf_prob.first = c;
			conf_prob.second = topic_conf[k][c];//phi[k][w];
			confs_probs.push_back(conf_prob);
		}
    		
		std::sort(confs_probs.begin(), confs_probs.end(), PairIDGreater);
        // quick sort to sort word-topic probability
		//utils::quicksort(authors_probs, 0, authors_probs.size() - 1);
		
		fprintf(fout, "Topic %dth:\n", k);
		for (int i = 0; (i < twords && i<confs_probs.size()); i++) {
			it = id2conf.find(confs_probs[i].first);
			if (it != id2conf.end()) {
			fprintf(fout, "\t%s   %f\n", (it->second).c_str(), confs_probs[i].second);
			}
		}
		confs_probs.clear();		
    }
    
    fclose(fout);    
    
    return 0; 
}

int model::save_inf_model(string model_name) {
	if (save_inf_model_tassign(dir + model_name + tassign_suffix)) {
		return 1;
    }
    
    if (save_inf_model_others(dir + model_name + others_suffix)) {
		return 1;
    }
    
    //if (save_inf_model_newtheta(dir + model_name + theta_suffix)) {
	//	return 1;
    //}
    
    if (save_inf_model_newphi(dir + model_name + phi_suffix)) {
		return 1;
    }

    if (twords > 0) {
		if (save_inf_model_twords(dir + model_name + twords_suffix)) {
			return 1;
		}
    }
    
	compute_and_save_perplexity("perplexity.txt");
    return 0;
}

int model::save_inf_model_tassign(string filename) {
    int i, j;
    
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }

    // wirte docs with topic assignments for words
    for (i = 0; i < pnewdata->M; i++) {    
	for (j = 0; j < pnewdata->docs[i]->length; j++) {
	    fprintf(fout, "%d:%d ", pnewdata->docs[i]->words[j], newz[i][j]);
	}
	fprintf(fout, "\n");
    }

    fclose(fout);
    
    return 0;
}

int model::save_inf_model_newtheta(string filename) {
    int i, j;

    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (i = 0; i < newM; i++) {
	for (j = 0; j < K; j++) {
	    fprintf(fout, "%f ", newtheta[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);
    
    return 0;
}

int model::save_inf_model_newphi(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    for (int i = 0; i < K; i++) {
	for (int j = 0; j < newV; j++) {
	    fprintf(fout, "%f ", newphi[i][j]);
	}
	fprintf(fout, "\n");
    }
    
    fclose(fout);    
    
    return 0;
}

int model::save_inf_model_others(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
		printf("Cannot open file %s to save!\n", filename.c_str());
		return 1;
    }

    fprintf(fout, "alpha=%f\n", alpha);
    fprintf(fout, "beta=%f\n", beta);
    fprintf(fout, "ntopics=%d\n", K);
    fprintf(fout, "ndocs=%d\n", newM);
    fprintf(fout, "nwords=%d\n", newV);
    fprintf(fout, "liter=%d\n", inf_liter);
	    
    fclose(fout);    
    
    return 0;
}

int model::save_inf_model_twords(string filename) {
    FILE * fout = fopen(filename.c_str(), "w");
    if (!fout) {
	printf("Cannot open file %s to save!\n", filename.c_str());
	return 1;
    }
    
    if (twords > newV) {
	twords = newV;
    }
    mapid2word::iterator it;
    map<int, int>::iterator _it;
    
    for (int k = 0; k < K; k++) {
	vector<pair<int, double> > words_probs;
	pair<int, double> word_prob;
	for (int w = 0; w < newV; w++) {
	    word_prob.first = w;
	    word_prob.second = newphi[k][w];
	    words_probs.push_back(word_prob);
	}
    
        // quick sort to sort word-topic probability
	utils::quicksort(words_probs, 0, words_probs.size() - 1);
	
	fprintf(fout, "Topic %dth:\n", k);
	for (int i = 0; i < twords; i++) {
	    _it = pnewdata->_id2id.find(words_probs[i].first);
	    if (_it == pnewdata->_id2id.end()) {
		continue;
	    }
	    it = id2word.find(_it->second);
	    if (it != id2word.end()) {
		fprintf(fout, "\t%s   %f\n", (it->second).c_str(), words_probs[i].second);
	    }
	}
    }
    
    fclose(fout);    
    
    return 0;    
}


int model::init_est() {
    int m, n, w, k, a, c;

//    p = new double[K];

    // + read training data
    ptrndata = new dataset;
    if (ptrndata->read_trndata(dir + dfile, dir + wordmapfile, dir + authormapfile, dir + confmapfile)) {
        printf("Fail to read training data!\n");
        return 1;
    }
		
    // + allocate memory and assign values for variables
    M = ptrndata->M;
    V = ptrndata->V;
	A = ptrndata->A;
	C = ptrndata->C;

    // K: from command line or default value
    // alpha, beta: from command line or default values
    // niters, savestep: from command line or default values

    nw = new int*[V];
    for (w = 0; w < V; w++) {
        nw[w] = new int[K];
        for (k = 0; k < K; k++) {
    	    nw[w][k] = 0;
        }
    }
	
    nd = new int*[M];
    for (m = 0; m < M; m++) {
        nd[m] = new int[K];
        for (k = 0; k < K; k++) {
    	    nd[m][k] = 0;
        }
    }

	na = new int*[A];
	for(a = 0; a<A; a++){
		na[a] = new int[K];
		for(k=0; k<K; k++){
			na[a][k] = 0;
		}
	}

	nc = new int*[C];
	for(w=0; w<C; w++){
		nc[w] = new int[K];
		for(k=0; k<K; k++){
			nc[w][k] = 0;
		}
	}

	nkcsum = new int[K];
	for(k=0; k<K; k++){
		nkcsum[k] = 0;
	}
	
    nwsum = new int[K];
    for (k = 0; k < K; k++) {
		nwsum[k] = 0;
    }
    
    ndsum = new int[M];
    for (m = 0; m < M; m++) {
	ndsum[m] = 0;
    }

	naksum = new int[A];
	for(a = 0; a < A; a++){
		naksum[a] = 0;
	}

	ncksum = new int[C];
	for(c = 0; c < C; c++){
		ncksum[c] = 0;
	}

	nkasum = new int[K];
	for(k=0; k<K; k++){
		nkasum[k] = 0;
	}
	

    //srandom(time(0)); // initialize for random number generation
	TOTAL_W = 0;
	srand(time(0));
    z = new int*[M];
    for (m = 0; m < ptrndata->M; m++) {
		int N = ptrndata->docs[m]->length;
		z[m] = new int[N];
		
		int curA = ptrndata->docs[m]->authorcount;

        // initialize for z
        for (n = 0; n < N; n++) {
    	    //int topic = (int)(((double)random() / RAND_MAX) * K);
			int topic = (int)(((double)rand() / RAND_MAX) * K);		
			if(topic == K)
				topic = K-1;

			int tmp = (int)(((double)rand() / RAND_MAX) * curA);
			if(tmp==curA)
				tmp = curA-1;

			int author = ptrndata->docs[m]->authors[tmp];
			int conf = ptrndata->docs[m]->confid;

    	    z[m][n] = author*K + topic;    	    

    	    // number of instances of word i assigned to topic j
    	    nw[ptrndata->docs[m]->words[n]][topic] += 1;
    	    // number of words in document i assigned to topic j
    	    nd[m][topic] += 1;
    	    // total number of words assigned to topic j
    	    nwsum[topic] += 1;
			nc[ptrndata->docs[m]->confid][topic] += 1;
			nkcsum[topic] += 1;
			ncksum[conf] +=1;

			na[author][topic] += 1;
			naksum[author] += 1;
			nkasum[topic] += 1;

			TOTAL_W++;
        } 
        // total number of words in document i
        ndsum[m] = N;
    }

	    
    theta = new double*[M];
    for (m = 0; m < M; m++) {
        theta[m] = new double[K];
    }
	
    phi = new double*[K];
    for (k = 0; k < K; k++) {
        phi[k] = new double[V];
    }

	theta_ak = new double *[A];
	for(a =0; a<A; a++){
		theta_ak[a] = new double[K];
	}

	theta_ck = new double *[C];
	for(c =0; c<C; c++){
		theta_ck[c] = new double[K];
	}
	
	topic_doc_kd = new double *[K];
	for(k=0; k<K; k++){
		topic_doc_kd[k] = new double[M];
	}

	topic_author_ka = new double *[K];
	for(k=0; k<K; k++){
		topic_author_ka[k] = new double[A];
	}

	topic_conf = new double *[K];
	for(k=0; k<K; k++){
		topic_conf[k] = new double[C];
	}
	topic = new double [K];
	test_topic_given_doc = new double *[M];
	for(m =0;m<M;m++){
		test_topic_given_doc[m] = new double[K];
		for(k = 0; k<K ;k++){
			test_topic_given_doc[m][k]= 0.0;
		}
	}

	test_doc_given_topic = new double *[K];
	for(k =0;k<K;k++){
		test_doc_given_topic[k] = new double[M];
		for(m = 0; m<M ;m++){
			test_doc_given_topic[k][m]= 0.0;
		}
	}



    return 0;
}

int model::init_estc() {
    // estimating the model from a previously estimated one
    int m, n, w, k;

//    p = new double[K];

    // load moel, i.e., read z and ptrndata
    if (load_model(model_name)) {
		printf("Fail to load word-topic assignmetn file of the model!\n");
		return 1;
    }

    nw = new int*[V];
    for (w = 0; w < V; w++) {
        nw[w] = new int[K];
        for (k = 0; k < K; k++) {
    	    nw[w][k] = 0;
        }
    }
	
    nd = new int*[M];
    for (m = 0; m < M; m++) {
        nd[m] = new int[K];
        for (k = 0; k < K; k++) {
    	    nd[m][k] = 0;
        }
    }
	
    nwsum = new int[K];
    for (k = 0; k < K; k++) {
	nwsum[k] = 0;
    }
    
    ndsum = new int[M];
    for (m = 0; m < M; m++) {
	ndsum[m] = 0;
    }

    for (m = 0; m < ptrndata->M; m++) {
	int N = ptrndata->docs[m]->length;

	// assign values for nw, nd, nwsum, and ndsum	
        for (n = 0; n < N; n++) {
    	    int w = ptrndata->docs[m]->words[n];
    	    int topic = z[m][n];
    	    
    	    // number of instances of word i assigned to topic j
    	    nw[w][topic] += 1;
    	    // number of words in document i assigned to topic j
    	    nd[m][topic] += 1;
    	    // total number of words assigned to topic j
    	    nwsum[topic] += 1;
        } 
        // total number of words in document i
        ndsum[m] = N;      
    }
	
    theta = new double*[M];
    for (m = 0; m < M; m++) {
        theta[m] = new double[K];
    }
	
    phi = new double*[K];
    for (k = 0; k < K; k++) {
        phi[k] = new double[V];
    }    

    return 0;        
}

void model::estimate() {
    if (twords > 0) {
		// print out top words per topic
		dataset::read_wordmap(dir + wordmapfile, &id2word);
		dataset::read_authormap(dir + authormapfile, &id2author);
		dataset::read_id2confmap(dir + confmapfile, &id2conf);
    }

    printf("Sampling %d iterations!\n", niters);

    int last_iter = liter;
    for (liter = last_iter + 1; liter <= niters + last_iter; liter++) {
		printf("Iteration %d ...\n", liter);
	
		// for all z_i
		for (int m = 0; m < M; m++) {
			for (int n = 0; n < ptrndata->docs[m]->length; n++) {
			// (z_i = z[m][n])
			// sample from p(z_i|z_-i, w)
				//actually, here topic is the product of sampled topic and author
				int topic = sampling(m, n);
				
				z[m][n] = topic;
			}
		}
	
		if (savestep > 0) {
			if (liter % savestep == 0) {
				// saving the model
				printf("Saving the model at iteration %d ...\n", liter);
				compute_theta();
				compute_theta_ak();
				compute_theta_ck();
				compute_phi();
				compute_topic_doc_kd();
				compute_topic_author_ka();
				compute_topic_conf();	
				compute_topic();
				//compute_doc();
				compute_test_topic_given_doc();
				compute_test_doc_given_topic();
				save_model(utils::generate_model_name(liter), false);
			}
		}
    }
    
    printf("Gibbs sampling completed!\n");
    printf("Saving the final model!\n");
    compute_theta();
	compute_theta_ak();
	compute_theta_ck();
    compute_phi();
	compute_topic_doc_kd();
	compute_topic_author_ka();
	compute_topic_conf();
	compute_topic();
	//compute_doc();
	compute_test_topic_given_doc();
	compute_test_doc_given_topic();

    liter--;
    save_model(utils::generate_model_name(-1), true);	

	//conference_suggestion();
}

int model::sampling(int m, int n) {
    // remove z_i from the count variables
    int ka = z[m][n];
	int author = (int)(ka/K);
	//author = ptrndata->docs[m]->authors[tmp];
	int topic = ka%K;


    int w = ptrndata->docs[m]->words[n];
	int cid = ptrndata->docs[m]->confid;
    nw[w][topic] -= 1;
    nd[m][topic] -= 1;
	na[author][topic] -= 1;
	nc[cid][topic] -= 1;

    nwsum[topic] -= 1;
    ndsum[m] -= 1;
	naksum[author] -= 1;
	nkasum[topic] -= 1;
	nkcsum[topic] -= 1;
	ncksum[cid] -= 1;

    double Vbeta = V * beta;
	double Cmu = C * mu;
    double Kalpha = K * alpha; 
	// do multinomial sampling via cumulative method
	int k, a, aid, acount;
	acount = this->ptrndata->docs[m]->authorcount;
	int pnum = K * acount;
	double * pp = new double[pnum];

	double confprob = 1.0;
	double topicauthorprob = 0.0;
	double wordtopicprob = 0.0;
	for(a=0; a<acount; a++){
		for(k=0; k<K; k++){
			confprob = (nc[cid][k] + mu) / (nkcsum[k] + Cmu);			
			aid = this->ptrndata->docs[m]->authors[a];
			
			topicauthorprob = (na[aid][k] + alpha)/(naksum[aid] + Kalpha);
			wordtopicprob = (nw[w][k] + beta) / (nwsum[k] + Vbeta);			

			pp[a*K + k] = wordtopicprob * topicauthorprob * confprob;
			
		}
	}
    
    // cumulate multinomial parameters
    for (k = 1; k < pnum; k++) {
		pp[k] += pp[k - 1];
    }
	if(pp[pnum-1]==0)
		printf("sum0 ");
    // scaled sample because of unnormalized p[]
    //double u = ((double)random() / RAND_MAX) * p[K - 1];
	double u = ((double)rand() / RAND_MAX) * pp[pnum - 1];
    
    for (ka = 0; ka < pnum; ka++) {
		if (pp[ka] >= u) {
			break;
		}
    }
	int tmp = (int)(ka/K);
	if(tmp>=pnum)
		printf("over ");
	author = this->ptrndata->docs[m]->authors[tmp];
	topic = ka%K;	
    
    // add newly estimated z_i to count variables
    nw[w][topic] += 1;
    nd[m][topic] += 1;
	na[author][topic] += 1;
	nc[cid][topic] += 1;

    nwsum[topic] += 1;
    ndsum[m] += 1;    
    naksum[author] += 1;
	nkasum[topic] += 1;
	nkcsum[topic] += 1;
	ncksum[cid] +=1;

	ka = author * K + topic;
	delete pp;

    return ka;
}

void model::compute_theta_ak() {
    for (int a = 0; a < A; a++) {
		for (int k = 0; k < K; k++) {
			theta_ak[a][k] = (na[a][k] + alpha) / (naksum[a] + K * alpha);
		}
    }
}

void model::compute_topic_author_ka() {
    for (int k = 0; k < K; k++) {
		for (int a = 0; a < A; a++) {
			topic_author_ka[k][a] = (na[a][k] + alpha) / (nkasum[k] + A * alpha);
		}
    }
}

void model::compute_theta() {
    for (int m = 0; m < M; m++) {
		for (int k = 0; k < K; k++) {
			theta[m][k] = (nd[m][k] + alpha) / (ndsum[m] + K * alpha);
		}
    }
}
void model::compute_topic_doc_kd() {
    for (int k = 0; k < K; k++) {
		for (int m = 0; m < M; m++) {
				topic_doc_kd[k][m] = (nd[m][k] + alpha) / (nwsum[k] + M * alpha);
		}
    }
}


void model::compute_phi() {
    for (int k = 0; k < K; k++) {
		for (int w = 0; w < V; w++) {
			phi[k][w] = (nw[w][k] + beta) / (nwsum[k] + V * beta);
			//if(phi[k][w]<0)
			//	printf("ad");
		}
    }
}

void model::compute_topic_conf() {
    for (int k = 0; k < K; k++) {
		for (int c = 0; c < C; c++) {
			topic_conf[k][c] = (nc[c][k] + mu) / (nkcsum[k] + C * mu);
			//if(phi[k][w]<0)
			//	printf("ad");
		}
    }
}
void model::compute_theta_ck() {
    for (int c = 0; c < C; c++) {
		for (int k = 0; k < K; k++) {
			theta_ck[c][k] = (nc[c][k] + mu) / (ncksum[c] + K * mu);
		}
    }
}

void model::compute_topic() {
	double wsum = 0;
	for (int m = 0; m < M; m++) {
			int N = ptrndata->docs[m]->length;
			wsum +=N;
	}
	for (int k = 0; k < K; k++) {
			topic[k] = nwsum[k]/wsum;
	}
}
void model::compute_test_topic_given_doc() {

	for (int m = 0; m < M; m++) {
		if(m == 956)
		{
			int test_m = 0;
		}
		for (int k = 0 ; k < K; k++){
			if(k == 1)
			{
				int test_k = 0;
			}
			int account = ptrndata->docs[m]->authorcount;
			for (int a = 0; a < account; a++) {
				int aid = ptrndata->docs[m]->authors[a];
				test_topic_given_doc[m][k] += theta_ak[aid][k]*((double)1/(double)account);
				int test_a = 0;
			}
		}
    }
    
	
}

void model::compute_test_doc_given_topic() {

	for (int k = 0; k < K; k++) {
		double sum = 0.0;
		for (int m = 0 ; m < M; m++){

			double author_pro = 1.0;
			int account = ptrndata->docs[m]->authorcount;
			for (int a = 0; a < account; a++) {
				int aid = ptrndata->docs[m]->authors[a];
				author_pro += log(this->topic_author_ka[k][aid]);
			}

			int confid = ptrndata->docs[m]->confid;
			double conf_pro = log(this->topic_conf[k][confid]);

			double word_pro = 1.0;
			int wordcount = ptrndata->docs[m]->length;
			for(int w=0 ;w < wordcount;w++){
				
				int w_index = ptrndata->docs[m]->words[w];
				word_pro += log(this->phi[k][w_index]);
			}
			double doc_proc = exp( (author_pro/account) + (conf_pro) + (word_pro/wordcount));
			sum +=doc_proc;
			test_doc_given_topic[k][m] = doc_proc;
		}
		for(int m = 0 ; m < M; m++ )
		{
			test_doc_given_topic[k][m] = test_doc_given_topic[k][m]/sum;
			int test = 0;
		}
		


    }
    
	
}








int model::init_inf() {
    // estimating the model from a previously estimated one
    int m, n, w, k, a, c;

//    p = new double[K];

    // load moel, i.e., read z and ptrndata
    if (load_model(model_name)) {
		printf("Fail to load word-topic assignmetn file of the model!\n");
		return 1;
    }

    nw = new int*[V];
    for (w = 0; w < V; w++) {
        nw[w] = new int[K];
        for (k = 0; k < K; k++) {
    	    nw[w][k] = 0;
        }
    }
	
    nd = new int*[M];
    for (m = 0; m < M; m++) {
        nd[m] = new int[K];
        for (k = 0; k < K; k++) {
    	    nd[m][k] = 0;
        }
    }

	na = new int*[A];
	for(a = 0; a<A; a++){
		na[a] = new int[K];
		for(k=0; k<K; k++){
			na[a][k] = 0;
		}
	}

	nc = new int*[C];
	for(w=0; w<C; w++){
		nc[w] = new int[K];
		for(k=0; k<K; k++){
			nc[w][k] = 0;
		}
	}

	nkcsum = new int[K];
	for(k=0; k<K; k++){
		nkcsum[k] = 0;
	}
	
    nwsum = new int[K];
    for (k = 0; k < K; k++) {
	nwsum[k] = 0;
    }
    
    ndsum = new int[M];
    for (m = 0; m < M; m++) {
	ndsum[m] = 0;
    }

	naksum = new int[A];
	for(a = 0; a < A; a++){
		naksum[a] = 0;
	}

	ncksum = new int[C];
	for(c = 0; c < C; c++){
		ncksum[c] = 0;
	}

	nkasum = new int[K];
	for(k=0; k<K; k++){
		nkasum[k] = 0;
	}

	TOTAL_W = 0;

    for (m = 0; m < ptrndata->M; m++) {
	int N = ptrndata->docs[m]->length;

	// assign values for nw, nd, nwsum, and ndsum	
        for (n = 0; n < N; n++) {
    	    int w = ptrndata->docs[m]->words[n];
			int ka = z[m][n];
			int author, topic;
			author = (int)(ka/K);
			topic = ka%K;
    	    
    	    
    	    // number of instances of word i assigned to topic j
    	    nw[w][topic] += 1;
    	    // number of words in document i assigned to topic j
    	    nd[m][topic] += 1;
    	    // total number of words assigned to topic j
    	    nwsum[topic] += 1;

			na[author][topic] += 1;
			naksum[author] += 1;
			nkasum[topic] += 1;

			nc[ptrndata->docs[m]->confid][topic] += 1;
			nkcsum[topic] += 1;
			ncksum[ptrndata->docs[m]->confid] += 1;

			TOTAL_W++;
        } 
        // total number of words in document i
        ndsum[m] = N;      
    }
    
    // read new data for inference
    pnewdata = new dataset;
    if (withrawstrs) {
	if (pnewdata->read_newdata_withrawstrs(dir + dfile, dir + wordmapfile)) {
    	    printf("Fail to read new data!\n");
    	    return 1;
	}    
    } else {
	if (pnewdata->read_newdata(dir + dfile, dir + wordmapfile, dir + authormapfile, dir + confmapfile)) {
    	    printf("Fail to read new data!\n");
    	    return 1;
	}    
    }
    
    newM = pnewdata->M;
    newV = pnewdata->V;
	newA = pnewdata->A;
	newC = pnewdata->C;
    
    newnw = new int*[newV];
    for (w = 0; w < newV; w++) {
        newnw[w] = new int[K];
        for (k = 0; k < K; k++) {
    	    newnw[w][k] = 0;
        }
    }
	
    newnd = new int*[newM];
    for (m = 0; m < newM; m++) {
        newnd[m] = new int[K];
        for (k = 0; k < K; k++) {
    	    newnd[m][k] = 0;
        }
    }
	
	newna = new int*[newA];
	for(a = 0; a<newA; a++){
		newna[a] = new int[K];
		for(k=0; k<K; k++){
			newna[a][k] = 0;
		}
	}

	newnc = new int*[newC];
	for(w=0; w<newC; w++){
		newnc[w] = new int[K];
		for(k=0; k<K; k++){
			newnc[w][k] = 0;
		}
	}

	newnkcsum = new int[K];
	for(k=0; k<K; k++){
		newnkcsum[k] = 0;
	}

	newncksum = new int[C];
	for(c=0; c<C; c++){
		newncksum[c] = 0;
	}

    newnwsum = new int[K];
    for (k = 0; k < K; k++) {
	newnwsum[k] = 0;
    }
    
    newndsum = new int[newM];
    for (m = 0; m < newM; m++) {
	newndsum[m] = 0;
    }

	newnaksum = new int[newA];
	for(a = 0; a < newA; a++){
		newnaksum[a] = 0;
	}

	newnkasum = new int[K];
	for(k=0; k<K; k++){
		newnkasum[k] = 0;
	}


    //srandom(time(0)); // initialize for random number generation
	srand(time(0)); // initialize for random number generation
    newz = new int*[newM];
    for (m = 0; m < pnewdata->M; m++) {
		int N = pnewdata->docs[m]->length;
		newz[m] = new int[N];

		int curA = pnewdata->_docs[m]->authorcount;
	// assign values for nw, nd, nwsum, and ndsum	
        for (n = 0; n < N; n++) {
    	    int w = pnewdata->docs[m]->words[n];
    	    int _w = pnewdata->_docs[m]->words[n];
    	    //int topic = (int)(((double)random() / RAND_MAX) * K);
			int topic = (int)(((double)rand() / RAND_MAX) * K);
			if(topic == K)
				topic = K-1;
			int tmp = (int)(((double)rand() / RAND_MAX) * curA);
			if(tmp==curA)
				tmp = curA-1;
			int _author = pnewdata->_docs[m]->authors[tmp];

    	    newz[m][n] = _author*K + topic;
    	    
    	    // number of instances of word i assigned to topic j
    	    newnw[_w][topic] += 1;
    	    // number of words in document i assigned to topic j
    	    newnd[m][topic] += 1;
    	    // total number of words assigned to topic j
    	    newnwsum[topic] += 1;

			newnc[pnewdata->_docs[m]->confid][topic] += 1;
			newnkcsum[topic] += 1;
			newncksum[pnewdata->_docs[m]->confid] += 1;

			newna[_author][topic] += 1;
			newnaksum[_author] += 1;
			newnkasum[topic] += 1;
        } 
        // total number of words in document i
        newndsum[m] = N;      
    }    
    
    newtheta = new double*[newM];
    for (m = 0; m < newM; m++) {
        newtheta[m] = new double[K];
    }
	
    newphi = new double*[K];
    for (k = 0; k < K; k++) {
        newphi[k] = new double[newV];
    }    
    
	newtheta_ak = new double *[newA];
	for(a =0; a<newA; a++){
		newtheta_ak[a] = new double[K];
	}

	newtheta_ck = new double*[newC];
	for(c=0; c<newC; c++){
		newtheta_ck[K] = new double[K];
	}

	newtopic_author_ka = new double *[K];
	for(k=0; k<K; k++){
		newtopic_author_ka[k] = new double[newA];
	}

	newtopic_conf = new double *[K];
	for(k=0; k<K; k++){
		newtopic_conf[k] = new double[newC];
	}

    return 0;        
}

void model::inference() {
    if (twords > 0) {
		// print out top words per topic
		dataset::read_wordmap(dir + wordmapfile, &id2word);
		dataset::read_authormap(dir + authormapfile, &id2author);
		dataset::read_id2confmap(dir + confmapfile, &id2conf);
    }

    printf("Sampling %d iterations for inference!\n", niters);
    
    for (inf_liter = 1; inf_liter <= niters; inf_liter++) {
	printf("Iteration %d ...\n", inf_liter);
	
	// for all newz_i
	for (int m = 0; m < newM; m++) {
	    for (int n = 0; n < pnewdata->docs[m]->length; n++) {
				// (newz_i = newz[m][n])
				// sample from p(z_i|z_-i, w)
				int topic = inf_sampling(m, n);
				newz[m][n] = topic;
			}
		}
    }
    
	printf("Gibbs sampling for inference completed!\n");
    printf("Saving the inference outputs!\n");
    //compute_newtheta();    doc 2 topic
	compute_newtheta_ak();
    //compute_newtheta_ck();  conf 2 topic
	compute_newphi();
	compute_newtopic_author_ka();
	compute_newtopic_conf();
	

    inf_liter--;
	save_inf_model(dfile);
}

int model::inf_sampling(int m, int n) {
    // remove z_i from the count variables

	int ka = newz[m][n];
	int _author = (int)(ka/K);	
	int topic = ka%K;

    int w = pnewdata->docs[m]->words[n];
    int _w = pnewdata->_docs[m]->words[n];
	int _cid = pnewdata->_docs[m]->confid;
	int cid = pnewdata->docs[m]->confid;

    newnw[_w][topic] -= 1;
    newnd[m][topic] -= 1;
	newna[_author][topic] -= 1;
	newnc[_cid][topic] -= 1;

    newnwsum[topic] -= 1;
    newndsum[m] -= 1;
	newnaksum[_author] -= 1;
	newnkasum[topic] -= 1;
	newnkcsum[topic] -= 1;
	newncksum[_cid] -= 1;

    
    double Vbeta = V * beta;
    double Kalpha = K * alpha;
	double Cmu = C * mu;
    // do multinomial sampling via cumulative method
	int k, a, _aid, acount;
	acount = this->pnewdata->docs[m]->authorcount;
	int pnum = K * acount;
	double * pp = new double[pnum];

	double confprob = 1.0;
	double topicauthorprob = 0.0;
	double wordtopicprob = 0.0;
    for(a=0; a<acount; a++){
		for(k=0; k<K; k++){
			confprob = (nc[cid][k] + newnc[_cid][k] + mu) / (nkcsum[k] + newnkcsum[k] + Cmu);
			
			_aid = this->pnewdata->_docs[m]->authors[a];
			
			topicauthorprob = (newna[_aid][k] + alpha)/(newnaksum[_aid] + Kalpha);
			wordtopicprob = (nw[w][k] + newnw[_w][k] + beta) / (nwsum[k] + newnwsum[k] + Vbeta);			

			pp[a*K + k] = wordtopicprob * topicauthorprob * confprob;
			
		}
    }
    // cumulate multinomial parameters
    for (k = 1; k < pnum; k++) {
		pp[k] += pp[k - 1];
    }
    // scaled sample because of unnormalized p[]
    //double u = ((double)random() / RAND_MAX) * pp[K - 1];
	double u = ((double)rand() / RAND_MAX) * pp[pnum - 1];
    
    for (ka = 0; ka<pnum; ka++) {
		if (pp[ka] >= u) {
			break;
		}
    }
	int tmp = (int)(ka/K);
	if(tmp>=pnum)
		printf("over ");
	_author = this->pnewdata->_docs[m]->authors[tmp];
	topic = ka%K;	
    
    // add newly estimated z_i to count variables
    newnw[_w][topic] += 1;
    newnd[m][topic] += 1;
	newna[_author][topic] += 1;
	newnc[_cid][topic] += 1;

    newnwsum[topic] += 1;
    newndsum[m] += 1; 
	newnaksum[_author] += 1;
	newnkasum[topic] += 1;
	newnkcsum[topic] += 1;
    newncksum[_cid] += 1;

	ka = _author * K + topic;
	delete pp;

    return ka;
}

double model::compute_perplexity(){
	double perplexity = 0.0;
	double denominator = 0.0;
	double numerator = 0.0;
	double perp = 0.0;

	for(int m=0; m<newM; m++){
		int Nd = pnewdata->docs[m]->length;
		
		int acount = pnewdata->docs[m]->authorcount;
		perp = 0.0;
		double p = 0.0;
		for(int n=0; n<Nd; n++){
			int w = pnewdata->_docs[m]->words[n];
			for(int a=0; a<acount; a++){
				int aid = pnewdata->_docs[m]->authors[a];
				for(int k=0; k<K; k++){				
					p += 1.0/acount * newtheta_ak[aid][k]*newphi[k][w];
				}	
			}
			if(p==0)
				printf("fds");
			if(isnan(p))
				printf("nan ");
			perp += log(p);
		}
		numerator = perp;
		denominator = Nd;
		perplexity += exp(-numerator/denominator);
	}
	
	return perplexity;
}

void model::compute_newtheta_ak() {
    for (int a = 0; a < newA; a++) {
		for (int k = 0; k < K; k++) {
			newtheta_ak[a][k] = (newna[a][k] + alpha) / (newnaksum[a] + K * alpha);
		}
    }
}

void model::compute_newtopic_author_ka() {
    for (int k = 0; k < K; k++) {
		for (int a = 0; a < newA; a++) {
			newtopic_author_ka[k][a] = (newna[a][k] + alpha) / (newnkasum[k] + newA * alpha);
		}
    }
}


void model::compute_newtheta() {
    for (int m = 0; m < newM; m++) {
	for (int k = 0; k < K; k++) {
	    newtheta[m][k] = (newnd[m][k] + alpha) / (newndsum[m] + K * alpha);
	}
    }
}

void model::compute_newphi() {
    map<int, int>::iterator it;
    for (int k = 0; k < K; k++) {
	for (int w = 0; w < newV; w++) {
	    it = pnewdata->_id2id.find(w);
	    if (it != pnewdata->_id2id.end()) {
		newphi[k][w] = (nw[it->second][k] + newnw[w][k] + beta) / (nwsum[k] + newnwsum[k] + V * beta);
	    }
	}
    }
}

void model::compute_newtopic_conf(){
	for (int k = 0; k < K; k++) {
		for (int c = 0; c < newC; c++) {
			newtopic_conf[k][c] = (newnc[c][k] + mu) / (newnkcsum[k] + newC * mu);
		}
	}
}


/*
void model::citation_suggestion(){
	int m, r, n;
	int k, a, wid, cid, rid, aid;
	int i, citecount;
	int cor=0, err=0;
	double sum = 0.0; 
	double denominator = 0.0;
	double numerator = 1.0;
	double prob;
	double total_prob = 0.0;
	map<int, int> mapcites;
	int total_cite = 0;
	
	FILE * fout = fopen("citation_suggestion.out", "w");
    if (!fout) {
		printf("Cannot open file citation_suggestion.out to save!\n");
		return ;
    }
	
	for (m = 0; m < M; m++) {
		cid = ptrndata->docs[m]->confid;		
		citecount = ptrndata->docs[m]->citationcount;
		total_cite += citecount;

		mapcites.clear();
		printf("*** original citations for document %d:\n", m);
		for(i=0; i<citecount; i++){
			rid = ptrndata->docs[m]->citations[i];
			printf("\t%d\t%s\n", rid, ptrndata->docs[rid]->title.c_str());
			mapcites.insert(pair<int, int>(rid, 1));
			fprintf(fout, "%d ", rid);
		}
		fprintf(fout, "\n");

		vector<pair<int, double> > cite_probs;
		pair<int, double> cite_prob;

		for(r = 0; r < M; r++){
			if(r==m)
				continue;
			prob = 1.0;
			for (n = 0; n < ptrndata->docs[m]->length; n++) {
				wid = ptrndata->docs[m]->words[n];

				sum = 0.0;
				for(a = 0; a<ptrndata->docs[r]->authorcount; a++){								
					for(k=0; k<K; k++){
						aid = ptrndata->docs[r]->authors[a];
						double p_k_a = (ns0xz[aid][k] + alpha ) / (ns0xzsum[aid] + K * alpha);
						//double p_k_a = theta_ak[a][k];
						sum += p_k_a * phi[k][wid] * topic_conf[k][cid];
					}			
				}
				sum /= ptrndata->docs[r]->authorcount;
								
				prob *= sum;
			}
			
			cite_prob.first = r;
			cite_prob.second = prob;
			cite_probs.push_back(cite_prob);
			total_prob += prob;
		}
		for(r=0; r<cite_probs.size(); r++){
			cite_probs[r].second /= total_prob;
		}
		// quick sort to sort word-topic probability
		//utils::quicksort(words_probs, 0, words_probs.size() - 1);
		std::sort(cite_probs.begin(), cite_probs.end(), PairIDGreater);
		printf("suggested citations for document %d:\n", m);
		for(i=0; (i<20 && i<M-1); i++){//i<citecount
			printf("  %d\t%.10f\t%s\n", cite_probs[i].first, cite_probs[i].second, 
				ptrndata->docs[cite_probs[i].first]->title.c_str());

			fprintf(fout, "%d ", cite_probs[i].first);
		}
		fprintf(fout, "\n\n");

		map<int, int>::iterator it;
		for (i = 0; i < citecount; i++) {
			it = mapcites.find(cite_probs[i].first);
			if(it != mapcites.end()){
				cor++;
			} else {
				err++;
			}
		}
	}

	double precision = (double)cor/(double)(cor + err);
	double recall = (double)cor/(double)total_cite;
	double f1 = 2.0*precision*recall/(precision + recall);

	printf("citation suggestion performance:\n");
	printf("\tprecision:%f\n", precision);
	printf("\trecall:%f\n", recall);
	printf("\tF1-measure:%f\n", f1);

	return;
}
*/

void model::conference_suggestion(){
	int m, n;
	int k, a, wid, aid;
	int i, c;
	int cor=0, err=0;
	double sum = 0.0; 
	double sum2 = 0.0;
	double denominator = 0.0;
	double numerator = 1.0;
	double prob_nu, prob_de, prob;
	int total_conf = 0;
	mapid2conf::iterator confit;
	
	FILE * fout = fopen("conference_suggestion.out", "w");
    if (!fout) {
		printf("Cannot open file conference_suggestion.out to save!\n");
		return ;
    }   

	total_conf = M;
	for (m = 0; m < M; m++) {
		prob = 1.0;

		confit = id2conf.find(ptrndata->docs[m]->confid);
		printf("*** published conference for document %d: is\n %d, %s\n", m, ptrndata->docs[m]->confid, confit->second.c_str());
		fprintf(fout, "%d\n", ptrndata->docs[m]->confid);

		vector<pair<int, double> > conf_probs;
		pair<int, double> conf_prob;

		for(c = 0; c < C; c++){	
			prob_nu = 0.0;
			prob_de = 0.0;
			for(a = 0; a<ptrndata->docs[m]->authorcount; a++){
				aid = ptrndata->docs[m]->authors[a];
				numerator = 1.0;
				denominator = 1.0;
				for (n = 0; n < ptrndata->docs[m]->length; n++) {
					wid = ptrndata->docs[m]->words[n];
					sum = 0.0;
					sum2 = 0.0;
					for(k=0; k<K; k++){
						double p_k_a = theta_ak[aid][k];
						//theta_ak[a][k] = (na[a][k] + ns0xz[a][k] + ns1xz[a][k] + alpha + alpha_gamma) 
						//	/ (naksum[a] + ns0xzsum[a] + ns1xzsum[a]+ K * alpha + K*alpha_gamma);
						sum += p_k_a * phi[k][wid] * topic_conf[k][c];
						sum2 += p_k_a * phi[k][wid];
					}
					numerator *= sum;
					denominator *= sum2;
				}

				prob_nu += numerator;
				prob_de += denominator;
			}

			if(prob_de == 0)
				printf("why zero??\n");
			prob = prob_nu / prob_de;
			
			conf_prob.first = c;
			conf_prob.second = prob;
			conf_probs.push_back(conf_prob);
		}
		// quick sort to sort word-topic probability
		//utils::quicksort(words_probs, 0, words_probs.size() - 1);
		std::sort(conf_probs.begin(), conf_probs.end(), PairIDGreater);
		printf("suggested conferences for document %d:\n", m);
		for(i=0; (i<20 && i<conf_probs.size()); i++){
			printf("  %d\t%.10f\t%s\n", conf_probs[i].first, conf_probs[i].second, id2conf[conf_probs[i].first].c_str());
			fprintf(fout, "%d ", conf_probs[i].first);
		}
		fprintf(fout, "\n\n");

		map<int, int>::iterator it;
		if(conf_probs[0].first == ptrndata->docs[m]->confid){			
			cor++;
		} else {
			err++;
		}		
	}

	double precision = (double)cor/(double)(cor + err);
	double recall = (double)cor/(double)total_conf;
	double f1 = 2.0*precision*recall/(precision + recall);

	printf("conference suggestion performance:\n");
	printf("\tprecision:%f\n", precision);
	printf("\trecall:%f\n", recall);
	printf("\tF1-measure:%f\n", f1);

	fclose(fout);
	return;
}
