/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.3 (MAY 2004)
 *
 * Copyright (C) 2000-2004 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* evolqrnaversion.c
 * 
 *
 * ER, Sun May 16 11:54:20 CDT 2004 [STL, at home with Coro and my Mother]
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <time.h>

#include "funcs.h"
#include "evolfuncs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

/* Function: EvolScoreScanFast()
 *
 * Date:     ER, Sun May 16 15:29:44 CDT 2004  [St. Louis at home with Coro]
 *
 * Purpose:  This is the scanning version, similar to the one implemented in the failed "ncranscan" program.
 *
 *           For each position (j) we keep the score of the window [j-win+1,j]
 * 
 *           If original algorithm is order L^n,
 *
 *           this version becomes           L * w ^ {n-1} -- gain respect to the traditional scoring a window is ~ w/x.
 *
 *           The difference with score_scan() is that here everything is going to be performed in the (j,d) coordinate
 *           system. Before the HMM parts were done in the (start,i) system, which is clumsy, and not efficient.
 *
 *           While the original HMM parts were L^3 in time, with this new system, they are reduced to L^2,
 *           therefore the correspondin scanning version goes from Lw^2 to Lw.
 *
 * Returns:  void
 */
void
EvolScoreScanFast(FILE *ofp, 
		  FILE *printqfp,  char *printqfile, 
		  FILE *regressfp, char *regressionfile, 
		  int format, 
		  SQINFO sqinfoX, int *isegX, int *iseqX, double *freqX,
		  SQINFO sqinfoY, int *isegY, int *iseqY, double *freqY,
		  char *aliss,
		  int Lw, int leg, int win, int slide,
		  double id_ali, double id_win_mean, double id_win_sd,
		  double                ***cfg_node,
		  double                 **hexa,
		  double                  *codon_joint,
		  struct psubs_s          *pam_star,
		  struct psubs_s          *codprob_star,
		  struct psubs_s          *mutpxy_star,
		  struct psubs_s          *mut5pxy_star,
		  struct psubs_s          *riboprob_star,
		  struct pnonsubs_s       *pair5prob_star,
		  struct psubs_s          *mutpxy_rna_unpaired_star,
		  struct psubs_s          *mut5pxy_rna_unpaired_star,
		  struct psubs_s          *mut5pxy_rna_loop_star,
		  struct psubs_s          *mutpxy_rna_paired_star,
		  struct psubs_s          *mut5pxy_rna_paired_star,
		  struct dos_s             dos, 
		  struct emodel_s         *emodel, 
		  struct dpdscanfast_s     *dpdscan, 
		  struct dpd_s             *dpd, 
		  struct dpf_s             *dpf, 
		  struct rnascfgscanfast_s *mxscan, 
		  struct rnascfg_s         *mx, 
		  struct scores_s          *sc, 
		  struct ali_s             *ali,
		  struct scanfast_s        *scanfast,
		  int add_codon, int add_hexamer, 
		  int alignment, int use_ribo_approx, int cyk, int changefreq, int changefreqoverall, int evd,
		  int fastintloop, int is_fix_time, int logodds, int use_ribo_oldrna, int pedantic, 
		  int shuffle, int sre_shuffle, int con_shuffle, struct three_times_s time, 
		  int traceback, int verbose, int ones, int parse, int rnass, int doends, int N, double k, int shift, int *histo_sc)
{
  struct three_divergence_s  rnadiv;
  int                        j, jmod;
  int                        dfmax, dbmax;
  int                        l, lfmax, lbmax;
  int                        start, end;          /* first and last position of analysis                  */
  int                        lenX, lenY;          /* len of seq's  without gaps                           */
  int                        fstX, fstY;          /* mapping of startX and startY to the seq without gaps */
  int                       *segX, *segY;
  int                       *segrvX, *segrvY;
  int                       *segshX, *segshY;
  int                        fwindows = 0;
  int                        bwindows = 0;
  double                     id, gap, mut;
  double                     id_zsc;
  double                     id_zsc_min = 2.0;
  int                        use_win_id = FALSE;

  if      (use_ribo_approx) rnadiv = rnadiv_approx;
  else if (use_ribo_oldrna) rnadiv = rnadiv_oldrna;
  else                      rnadiv = rnadiv_ribosum;

  /* Remember that in this scanning version, the only possible shuffle is of the
   * whole aligment. Otherwise the stepwise calculation fails.
   *
   * To do a careful window-by-window shuffling revert to the original qrna (--noscan)
   */
  if (shuffle || sre_shuffle || con_shuffle) 
    {
      AllocIntSeqs(leg, &segshX, &segshY);

      DupIntSeq(isegX, segshX, leg-1, leg-1);
      DupIntSeq(isegY, segshY, leg-1, leg-1);  
      
      if (shuffle)     Shuffle2IntSequences(segshX, segshY, leg, leg-1, leg-1, verbose);
      if (sre_shuffle) QRNAIntShuffle(segshX, segshY, leg);
      if (con_shuffle) QRNAIntConservedShuffle(segshX, segshY, leg);
      
      segX = segshX;
      segY = segshY;
    }
  else 
    {
      segX = isegX;
      segY = isegY;
    }
  
  /* Reverse-Complemente the whole alignment
   */
  if (!ones) {
    AllocIntSeqs(leg, &segrvX, &segrvY);    
    RevComp(segrvX, segX, leg);
    RevComp(segrvY, segY, leg);
  }
  
  if (leg < win) win = leg;  /* if alignment is smaller than window, score the whole aligment at once*/
  

   /* Use the base-composition, lenght and time to construct the models
   */
  EvolConstructModels_phase2(ofp, win, cfg_node, hexa, codon_joint, freqX, freqY, emodel, time,
			     pam_star, codprob_star, mutpxy_star, mut5pxy_star, riboprob_star, 
			     pair5prob_star, 
			     mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, mut5pxy_rna_loop_star, 
			     mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
			     add_codon, add_hexamer, changefreq, changefreqoverall, logodds, use_ribo_oldrna, pedantic, verbose);

  /*
   *  Calculate scores from [j-d,j] for j = 0 to leg-win+1 and d \in [0,dmax]
   *   
   */
  for (j = 0; j < leg; j ++) {
    
   jmod = j % win;

   dfmax = IndexForwardWindow (leg, win, slide, j);
   dbmax = IndexBackwardWindow(leg, win, slide, j);
   
   lfmax = dfmax + 1;
   lbmax = dbmax + 1;
   
   /* 
    *  FORWARD strand 
    */
   for (l = 0; l <= lfmax; l++) 
     fwindows += EvolScoreWithModelsScanFast(ofp, sqinfoX, segX, sqinfoY, segY, aliss, leg, win, slide, j, jmod, l, lfmax, emodel, dpdscan,
					     mxscan, sc, ali, scanfast, alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, FALSE, 
					     traceback, verbose);   
  
   /* 
    *  BACkWARD strand 
    */
   if (!ones) 
     for (l = 0; l <= lbmax; l++) 
       bwindows += EvolScoreWithModelsScanFast(ofp, sqinfoX, segrvX, sqinfoY, segrvY, aliss, leg, win, slide, j, jmod, l, lbmax, emodel, dpdscan,
					       mxscan, sc, ali, scanfast, alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, TRUE, 
					       traceback, verbose);  
   
  }/* while j < leg */
  
  /* Check that we calculated the same number of scores Forwards and Backwards
   */
  if (!ones && fwindows != bwindows) Die ("ScoreScanFast(): wrong calculation of full windows [forward=%d, backwards=%d]\n", fwindows, bwindows); 

  /* For each full scanning window, give some statistics 
   *
   * and calculate POSTERIORS for the three functions
   */
  for (j = win-1; j < leg; j ++) {

    dfmax = IndexForwardWindow (leg, win, slide, j);

    if (dfmax == win - 1 || (j == leg-1 && dfmax >= 0))
      {	
	end   = j;
	start = end - dfmax;
	
	PercIdSeqs(segX, segY, end, dfmax, &id, &gap, &mut);
	if (id_win_sd > 0) id_zsc = (id - id_ali) / id_win_sd;
	else               id_zsc = 0.0;
	
	if (!is_fix_time && (id_zsc > id_zsc_min || id_zsc < -id_zsc_min)) use_win_id = TRUE;

	if (!is_fix_time && use_win_id) {
	  time = TimeIdCorrelation3Models(othdiv, coddiv, rnadiv, id); 
	  EvolConstructModels_phase2(ofp, win, cfg_node, hexa, codon_joint, 
				     freqX, freqY, emodel, time, 
				     pam_star, codprob_star, 
				     mutpxy_star, mut5pxy_star, riboprob_star, 
				     pair5prob_star, 
				     mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, mut5pxy_rna_loop_star, 
				     mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
				     add_codon, add_hexamer, changefreq, changefreqoverall, logodds, use_ribo_oldrna, pedantic, verbose);
	}

	if (verbose) printf("id_ali %f id_win %f id_win_mean %f id_zsc = %f\n", id_ali, id, id_win_mean, id_zsc);
	
	BaseComp(ofp, segX, end, dfmax, freqX);
	BaseComp(ofp, segY, end, dfmax, freqY);
	
	lenX = LenNoGaps(segX, end, dfmax); /* len of seqX without gaps */
	lenY = LenNoGaps(segY, end, dfmax); /* len of seqY without gaps */
	
	fstX = PosNoGaps(segX, start);
	fstY = PosNoGaps(segY, start);
	
	/* print scan banner 
	 */
	PrintScanBanner(ofp, start, end, lenX, lenY, fstX, fstY, freqX, freqY, id, gap, mut, use_win_id, time);
	
	/* print alignment if asked for it
	 */
	if (alignment) {
	  FillAliStruct(segX, segY, end, dfmax, ali);
	  PrintAlign(ofp, sqinfoX, sqinfoY, 0, dfmax+1, ali);
	}

	PrintQfile(printqfp, printqfile, sqinfoX, segX, sqinfoY, segY, 0, dfmax+1, start);
	
	/* Compare F(seq)[j] with F(seqrv)[j] and calculate POSTERIORS 
	 */
	fprintf(ofp, "LOCAL_DIAG_VITERBI -- ");
	RNAbanner(ofp, cyk);
	if (!use_win_id)
	  PosteriorScoresScanFast(ofp, scanfast, leg, j, win, slide, doends, ones); 
	else {    
	  /* the "given" RNA secondary structure (with gaps) if any goes to sqinfoX.ss and sqinfoY.ss
	   */
	  if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS))
	    if (verbose) PrintCtSeqs(ofp, &sqinfoX, isegX, &sqinfoY, isegY, start, end-start+1, aliss);
	  
	  EvolScoreWithModels(ofp, printqfp, printqfile, regressfp, regressionfile, 
			      sqinfoX, isegX, iseqX, sqinfoY, isegY, iseqY, aliss, start, 
			      end-start+1, Lw, dos, emodel, dpd, dpf, mx, sc, ali, 
			      alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, 
			      shuffle, sre_shuffle, con_shuffle, FALSE, traceback, verbose, use_win_id, time);
	}
	
	
	
	if (evd) FillScoreHistograms (stdout, sc, N, k, shift, histo_sc, ones);
	
	/* Regression test info.
	 */
	if (regressionfile != NULL) {
	  PrintScanBanner(regressfp, start, end, lenX, lenY, fstX, fstY, freqX, freqY, id, gap, mut, use_win_id, time);
	  fprintf(regressfp, "+> %f %f %f\n", scanfast->sc->oth[j],    scanfast->sc->cod[j],    scanfast->sc->rna[j]);
	  fprintf(regressfp, "-> %f %f %f\n", scanfast->sc->othrev[j], scanfast->sc->codrev[j], scanfast->sc->rnarev[j]);
	}
      } 
  }
  
  if (!ones)                                 { free(segrvX); free(segrvY); }
  if (shuffle || sre_shuffle || con_shuffle) { free(segshX); free(segshY); }
 
}

/* Function: EvolScoreWindow()
 *
 * Date:     ER,  Sun May 16 15:28:56 CDT 2004 [St. Louis at home with Coro]
 *
 * Purpose:  Scores with qrna in the traditional way of moving along the given
 *           alignment with a window (w) and sliding a number of positions (x) at the time.
 *           Each scoring window is assigned a 2D score and a winner model.
 *
 *           The number of scoring windows is  (L-w)/x, for an alignment of length L.
 * 
 *           If original algorithm is order L^n,
 *
 *           this version becomes           w^n * (L-w)/x 
 *
 * Returns: void
 */
void
EvolScoreWindow(FILE *ofp, 
		FILE *printqfp,  char *printqfile, 
		FILE *regressfp, char *regressionfile, 
		int format, 
		SQINFO sqinfoX, int *isegX, int *iseqX, double *freqX,
		SQINFO sqinfoY, int *isegY, int *iseqY, double *freqY,
		char *aliss,
		int Lw, int leg, int win, int slide,
		double id_ali, double id_win_mean, double id_win_sd,
		double            ***cfg_node,
		double             **hexa,
		double              *codon_joint,
		struct psubs_s      *pam_star,
		struct psubs_s      *codprob_star,
		struct psubs_s      *mutpxy_star,
		struct psubs_s      *mut5pxy_star,
		struct psubs_s      *riboprob_star,
		struct pnonsubs_s   *pair5prob_star,
		struct psubs_s      *mutpxy_rna_unpaired_star,
		struct psubs_s      *mut5pxy_rna_unpaired_star,
		struct psubs_s      *mut5pxy_rna_loop_star,
		struct psubs_s      *mutpxy_rna_paired_star,
		struct psubs_s      *mut5pxy_rna_paired_star,
		struct dos_s         d, 
		struct emodel_s     *emodel, 
		struct dpd_s        *dpd, 
		struct dpf_s        *dpf, 
		struct rnascfg_s    *mx, 
		struct scores_s     *sc, 
		struct ali_s        *ali,
		int add_codon, int add_hexamer, int alignment, int use_ribo_approx, 
		int cyk, int changefreq, int changefreqoverall, int changefreqwin, int evd, 
		int fastintloop, int is_fix_time, int logodds, int use_ribo_oldrna, int pedantic, 
		int shuffle, int sre_shuffle, int con_shuffle, int sweep, struct three_times_s  time, 
		int traceback, int verbose, int ones, int parse, int rnass, int doends, int shtoo, int twindow, int N, double k, int shift, int *histo_sc)
{
  struct three_divergence_s  rnadiv;
  double  id, gap, mut;
  double  id_zsc;
  double  id_zsc_min = 10.0;
  int     use_win_id = FALSE;
  int     i;
  int     pos;
  int     dis;
  
  if      (use_ribo_approx) rnadiv = rnadiv_approx;
  else if (use_ribo_oldrna) rnadiv = rnadiv_oldrna;
  else                      rnadiv = rnadiv_ribosum;

  /* Use the base-composition time and length to construct the models
   */
  if (!changefreqwin && ! twindow)
    EvolConstructModels_phase2(ofp, win, cfg_node, hexa, codon_joint, 
			       freqX, freqY, emodel, time, 
			       pam_star, codprob_star, 
			       mutpxy_star, mut5pxy_star, riboprob_star, 
			       pair5prob_star, 	     
			       mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, mut5pxy_rna_loop_star, 
			       mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
			       add_codon, add_hexamer, changefreq, changefreqoverall, logodds, use_ribo_oldrna, pedantic, verbose);
  
  /* Calculate scores from pos to pos+dis-1
   */
  pos = 0;
  while (pos < leg) {
    
    dis = (pos<leg-win)? win : leg-pos;
    
    if(changefreqwin || twindow) {
      /* calculate single-nt frequencies for window: [pos, pos+dis-1]
       */
      for (i = 0; i < 4; i++)         /* initialization         */
	{
	  freqX[i] = 0.0;   
	  freqY[i] = 0.0;   
	}
      BaseComp(ofp, isegX, pos+dis-1, dis-1, freqX);    /* freqs for seqX         */
      BaseComp(ofp, isegY, pos+dis-1, dis-1, freqY);    /* freqs for seqY         */
    }
    
    PercIdSeqs(isegX+pos, isegY+pos, dis-1, dis-1, &id, &gap, &mut);

    if (id_win_sd > 0) id_zsc = (id - id_ali) / id_win_sd;
    else               id_zsc = 0.0;
    if (!is_fix_time && (id_zsc > id_zsc_min || id_zsc < -id_zsc_min)) use_win_id = TRUE;
    
    if (verbose) printf("id_ali %f id_win %f id_win_mean %f id_zsc = %f\n", id_ali, id, id_win_mean, id_zsc);

    if (!is_fix_time && (twindow || use_win_id)) {
      time = TimeIdCorrelation3Models(othdiv, coddiv, rnadiv, id);  
      fprintf(ofp, "Divergence time (variable by window): %f %f %f\n", time.oth, time.cod, time.rna);
      EvolConstructModels_phase2(ofp, win, cfg_node, hexa, codon_joint, 
				 freqX, freqY, emodel, time, 
				 pam_star, codprob_star, 
				 mutpxy_star, mut5pxy_star, riboprob_star, 
				 pair5prob_star, 			     
				 mutpxy_rna_unpaired_star, mut5pxy_rna_unpaired_star, mut5pxy_rna_loop_star, 
				 mutpxy_rna_paired_star, mut5pxy_rna_paired_star, 
				 add_codon, add_hexamer, changefreq, changefreqoverall, logodds, use_ribo_oldrna, pedantic, verbose);
    }
    
    /* the "given" RNA secondary structure (with gaps) if any goes to sqinfoX.ss and sqinfoY.ss
     */
    if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS))
      if (verbose) PrintCtSeqs(ofp, &sqinfoX, isegX, &sqinfoY, isegY, pos, dis, aliss);
    
    EvolScoreWithModels(ofp, printqfp, printqfile, regressfp, regressionfile, 
			sqinfoX, isegX, iseqX, sqinfoY, isegY, iseqY, aliss, pos, 
			dis, Lw, d, emodel, dpd, dpf,mx, sc, ali, 
			alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, 
			shuffle, sre_shuffle, con_shuffle, sweep, traceback, verbose, use_win_id, time);
    
    if (!shuffle && shtoo) 
      EvolScoreWithModels(ofp, printqfp, printqfile, regressfp, regressionfile, 
			  sqinfoX, isegX, iseqX, sqinfoY, isegY, iseqY, aliss, pos, 
			  dis, Lw, d, emodel, dpd, dpf, mx, sc, ali, 
			  alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, 
			  FALSE, FALSE, TRUE, sweep, traceback, verbose, use_win_id, time);
    
    
    if (evd) FillScoreHistograms (stdout, sc, N, k, shift, histo_sc, ones);

     pos = (pos<leg-win)? pos+slide : leg;
  }

}

