/*
  pdb2dhl, pdb2epk:  Convert PDB files to dihedral angle/ECEPPAK input file format
  Copyright (C) 2002  Farokh Jamalyaria

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License
  as published by the Free Software Foundation; either version 2
  of the License, or (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

  Author's contact information:
  Farokh Jamalyaria, farokhj@yahoo.com
  9 Commons Lobby G222
  College Station, TX 77840
*/

/* Adapted from general PDB manipulation functions originally written
   by Farokh Jamalyaria for MMANIP, another program.
   Updated for pdb2dhl (by adding and removing functions from original
   file, Pdb_mgr_imp.cpp).
*/

#include "Pdb_dhl1.h"
#include "Pdb_dhl2.h"
#include "Pdb_mgr_imp.h"
#include "Format.h"
#include<cmath>

#define PI 3.1415927

Format sig3(3);

/* Reads a PDB file into memory.
   Remember to strip the file with stp-pdb.pl prior to invoking pdb2dhl.
*/
void
read_file(char* filename,
	  Pdb_line* memfile){

  pdb_rows = 0;
  line = new char[PDB_LINE_SIZE];
  ifstream f_in(filename);

  int i=0;
  while(memfile[i].read_file(f_in, line)){
    ++pdb_rows; 
    ++i;
  }

  delete line;

  f_in.close();
}

/*********************************************/
/* PDB file to dihedral angle representation */
/* conversion functions. (ECEPPAK format)    */
/*********************************************/

/* Writes N-terminus group's identifier into pdb2dhl's 
   sequence file (*.seq), an intermediate file used to create an
   ECEPPAK input file. (See prt_seq(...)).

   Note that this function currently handles only a few N-terminus
   possibilities. You can easily add more by appending your own
   "if" statements, using the same format as those below.
*/
void prt_seq_Nterm(ofstream& f_out2,
		   char* Nendgrp){

  if((strcmp(Nendgrp,"0")==0) || (strcmp(Nendgrp,"H2")==0))
    f_out2 << "H2N";
  if(strcmp(Nendgrp,"H3+")==0) f_out2 << "H3N";

}

/* Writes C-terminus group's identifier into sequence file. See
   immediately-previous discussion.
*/
void prt_seq_Cterm(ofstream& f_out2,
		   char* Cendgrp){

if((strcmp(Cendgrp,"COOH")==0) || (strcmp(Cendgrp,"COO")==0) ||
     (strcmp(Cendgrp,"0")==0))
  f_out2 << endl << "CXH";

}

/* Writes N-terminus dihedral angle(s) into pdb2dhl's
   input file (*.inp), an intermediate file used for further
   processing.
   Note that this function currently handles only a few N-terminus
   possibilities. You can easily add more by appending your own "if"
   statements, using a similar format to those below.
*/
void prt_inp_Nterm(ofstream& f_out,
		   char* Nendgrp){
if((strcmp(Nendgrp,"0")==0) || (strcmp(Nendgrp,"H2")==0))
  f_out << "   " << "0.000" << endl;
}

/* Writes C-terminus dihedral angle(s) into input file. See
   immediately-previous discussion.
*/
void prt_inp_Cterm(ofstream& f_out,
		   char* Cendgrp){
if((strcmp(Cendgrp,"COOH")==0) || (strcmp(Cendgrp,"COO")==0) ||
     (strcmp(Cendgrp,"0")==0))
  f_out << "   " << "0.000" << endl;
}

/* Creates the sequence file, an intermediate file named with a
   .seq file extension that is used to build the ECEPPAK input file.
   
   The sequence file lists the residues in their PDB file sequence,
   so the user must ensure that the PDB file lists residues in
   their spatially connected order.

   An example of such a file is out.seq, created
   from 1GCN.pdb. Compare it with ecp.inp, 1GCN.pdb's ECEPPAK input
   file.  These files are in the Examples directory.
*/
void prt_seq(Pdb_line* memfile,
	     char dir,
	     char* filename,
	     char* Nendgrp,
	     char* Cendgrp){

  sprintf(filename,"%s%s",filename,".seq");
  ofstream f_out2(filename);

  prt_seq_Nterm(f_out2, Nendgrp);
  
  /* Initialize a counter with which to move through
     residues. Will have to move by ATOM lines. Whenever
     value in counter mismatches that in current line, add
     a resname to the string array.
  */

  int ctr = 0; // Initial value should not be 1.
  int elts = memfile[pdb_rows-1].get_resnum();
  char** seq = new char*[elts];
  for(int k=0; k<elts; k++)
    seq[k] = new char[4];

  strcpy(seq[ctr],memfile[ctr].get_resname());
  ctr++;

  for(int i=1; i<=pdb_rows-1; i++){
    if(memfile[i-1].get_resnum() != memfile[i].get_resnum()){
      strcpy(seq[ctr],memfile[i].get_resname());
      ++ctr;
    }
  }

  /* Now print the sequence (forward or backward).
     Ensure that there are only 19 residues
     per line, since ECEPPAK reads only first 78 chars.
  */

  f_out2 << endl;
  if(dir == 'f'){
    f_out2 << seq[0]; /* There must be a 0th elt. */
    for(int j=1; j<ctr; j++){
      f_out2 << " " << seq[j];
      if((j+1) % 19 == 0) f_out2 << endl;
    }
  }else{
    f_out2 << seq[ctr-1]; /* There must be a final elt. */
    for(int j=ctr-2; j >=0; j--){
      f_out2 << " " << seq[j];
      if((ctr-j) % 19 == 0) f_out2 << endl;
    }
  }

  prt_seq_Cterm(f_out2, Cendgrp);
    
}

/* Process the polypeptide, converting from Cartesian-coordinate
   representation to dihedral-angle representation.

   An intermediate file, suffixed with ".inp", is created. This file
   contains dihedral angles and is used to create the ECEPPAK input
   file.
*/
void pdb_to_dhl(Pdb_line* memfile,
		char dir,
		char* filename,
		char* Nendgrp,
		char* Cendgrp){

  /* For each of first 3 AAs, create
     a map and fill with <atom name, index>.
  */

  map<string, int> m0; map<string, int> m1;
  map<string, int> m2;

  char* ofileinp = new char[20];

  /* Keys for the maps: */
  string m0key;
  string m1key;
  string m2key;

  int idx;
  sprintf(ofileinp,"%s%s",filename,".inp");
  ofstream f_out(ofileinp);
 
  fill_first_3(memfile, dir, m0, m1, m2, m0key, m1key, m2key, idx);
  
  /* If going forward through file, m0 holds 1st AA's info,
     m1 holds 2nd AA's, and m2 3rd AA's.
     If backward, m0 holds last AA's, m1 2nd-to-last, and m2
     3rd-to-last.
  */

  /* Standard dihedral angles: phi, psi, omega.

     Compute std. dihedral data for 1st or last AA,
     using user's end-group specification. Then compute
     chi dihedrals. Dump 1st or last AA's data to file.
     Then compute std. and chi dihedrals for 2nd or 2nd-to-last
     AA. Then start looping.
  */

  /* Initialize arrays to hold the values: */

  double* ppo = new double[3];
  int chicnt = 0;
  double* chi = new double[7];
  double* x = new double[4]; double* y = new double[4];
  double* z = new double[4];
  char* resname = new char[4];

  /* Write dihedral angle(s) for N-term. end grp */
  prt_inp_Nterm(f_out, Nendgrp);

  /* Compute chi dihedrals for 1st or last AA */
  chi_dhl(memfile,m0,chi,&chicnt,x,y,z,resname);
  /* Compute ppo dihedrals for 1st or last AA */
  ppo_Nterm(memfile,m0,m1,m2,ppo,x,y,z,Nendgrp);
  write_dhl_line(ppo,chi,chicnt,f_out);
  /* Compute ppo and chi dihedrals for 2nd or 2nd-to-last AA */
  ppo_dhl(memfile,m0,m1,m2,ppo,x,y,z);
  chi_dhl(memfile,m1,chi,&chicnt,x,y,z,resname);
  write_dhl_line(ppo,chi,chicnt,f_out);

  /* Loop over amino acids from start to end or
     from end to start, depending on dir value, computing and
     writing angles. Then break out, compute and write info for the 
     final AA (similar to work for initial AA).
  */

  lp_comp(memfile,dir,m0,m1,m2,ppo,chi,&chicnt,x,y,z,idx,resname,f_out);

  /* Compute final AA's ppo and chi angles */

  ppo_Cterm(memfile,m0,m1,m2,ppo,x,y,z,Cendgrp);
  chi_dhl(memfile,m2,chi,&chicnt,x,y,z,resname);
  write_dhl_line(ppo,chi,chicnt,f_out);

  /* Write dihedral angle(s) for C-term. end group */
  prt_inp_Cterm(f_out, Cendgrp);
  
  /* Don't use delete if new wasn't used; otherwise, do */
  delete ppo; delete chi; delete x; delete y; delete z; 
  delete ofileinp; delete resname;
  
  f_out.close();
}

/* Compute phi, psi, and omega dihedral angles for the N-terminus
   amino acid.
*/
void ppo_Nterm(Pdb_line* memfile,
	       map<string, int>& m0, map<string, int>& m1, map<string, int>& m2,
	       double* ppo,
	       double* x,double* y,double* z,
	       char* Nendgrp){

  /* Account for no hydrogens specified in ORIGINAL PDB */

  if(strcmp(Nendgrp,"0")==0) NnoH(memfile,m0,m1,m2,ppo,x,y,z);
  if((strcmp(Nendgrp,"H3+")==0) || (strcmp(Nendgrp,"H2")==0))
    NH3(memfile,m0,m1,m2,ppo,x,y,z);
}

void ppo_Cterm(Pdb_line* memfile,
	       map<string, int>& m0, map<string, int>& m1, map<string, int>& m2,
	       double* ppo,
	       double* x,double* y,double* z,
	       char* Cendgrp){

  if((strcmp(Cendgrp,"COOH")==0) || (strcmp(Cendgrp,"COO")==0) ||
     (strcmp(Cendgrp,"0")==0))
    COOH(memfile,m0,m1,m2,ppo,x,y,z);

}

/* Associate the maps with the first three or the last three
   residues of the Pdb_line array.
*/
void fill_first_3(Pdb_line* memfile,
		  char dir,
		  map<string, int>& m0, map<string, int>& m1, map<string, int>& m2,
		  string& m0key, string& m1key, string& m2key,
		  int& idx){

  int change = 0;
  int delta, resnum;

  if (dir == 'f') {idx = 0; delta = 1;}
  else {idx = pdb_rows - 1; delta = -1;}

  /* For each of the 3 residues, fill its map with
     the residue's (atom name)(altloc) values and their
     respective indices in the Pdb_line array.
  */
  while(change < 3){
    ++change;
    switch(change){
    case 1:
      resnum = memfile[idx].get_resnum();
      for(;;idx += delta){
	if(resnum != memfile[idx].get_resnum())
	  break;
	m0key = memfile[idx].get_atname();
	m0key += memfile[idx].get_altloc();
	m0[m0key] = idx;
      }
      break;
    case 2:
      resnum = memfile[idx].get_resnum();
      for(;;idx += delta){
	if(resnum != memfile[idx].get_resnum())
	  break;
	m1key = memfile[idx].get_atname();
	m1key += memfile[idx].get_altloc();
	m1[m1key] = idx;
      }
      break;
    case 3:
      resnum = memfile[idx].get_resnum();
      for(;;idx += delta){
	if(resnum != memfile[idx].get_resnum())
	  break;
	m2key = memfile[idx].get_atname();
	m2key += memfile[idx].get_altloc();
	m2[m2key] = idx;
      }
      break;
    default:
      break;
    }
  }
}

/* Write a residue's dihedral angles (one line) to the intermediate file
   suffixed with ".inp".  The numbers are formatted exactly as
   they should be inside the $GEOM area of an ECEPPAK input file.
*/
void write_dhl_line(double* ppo,
		    double* chi,
		    int chicnt,
		    ofstream& f_out){

  /* Ensure alignment of decimal points in *.inp */
  for(int i=0; i<3; i++){
    if(ppo[i]>=0.0 && ppo[i]<10.0){
      f_out << "   "; write_file(f_out, sig3(ppo[i]));
    }else if(ppo[i]>=10.0 && ppo[i]<100.0){
      f_out << "  "; write_file(f_out, sig3(ppo[i]));
    }else if(ppo[i]>=100.0){
      f_out << " "; write_file(f_out, sig3(ppo[i]));
    }else if(ppo[i]<0.0 && ppo[i]>-10.0){
      f_out << "  "; write_file(f_out, sig3(ppo[i]));
    }else if(ppo[i]<-10.0 && ppo[i]>-100.0){
      f_out << " "; write_file(f_out, sig3(ppo[i]));
    }else{
      write_file(f_out, sig3(ppo[i]));
    }
  }
  
  for(int i=0; i<=chicnt-1; i++){
    if(chi[i]>=0.0 && chi[i]<10.0){
      f_out << "   "; write_file(f_out, sig3(chi[i]));
    }else if(chi[i]>=10.0 && chi[i]<100.0){
      f_out << "  "; write_file(f_out, sig3(chi[i]));
    }else if(chi[i]>=100.0){
      f_out << " "; write_file(f_out, sig3(chi[i]));
    }else if(chi[i]<0.0 && chi[i]>-10.0){
      f_out << "  "; write_file(f_out, sig3(chi[i]));
    }else if(chi[i]<-10.0 && chi[i]>-100.0){
      f_out << " "; write_file(f_out, sig3(chi[i]));
    }else{
      write_file(f_out, sig3(chi[i]));
    }
  }
  f_out << endl;
}

/* This function moves through the (non-terminal) residues of the polypeptide,
   computing and writing dihedral angles.
*/
void lp_comp(Pdb_line* memfile,
	     char dir,
	     map<string, int>& m0,map<string, int>& m1,map<string, int>& m2,
	     double* ppo,
	     double* chi,
	     int* chicnt,
	     double* x,double* y,double* z,
	     int& idx,
	     char* resname,
	     ofstream& f_out){

  /* idx is already at first line of next residue */
  int delta, resnum;
  string temp;
  first = memfile[0].get_resnum();
  last = memfile[(pdb_rows-1)].get_resnum();
   
  if(dir == 'f') delta = 1;
  else delta = -1;

  for(;;){
    if(idx < 0 || idx >= pdb_rows) break;
  /* To shift the frame over by one, */
  /* Copy m1 into m0, m2 into m1, and... */
    m0 = m1;
    m1 = m2;
  /* ...fill m2 with data for next AA in sequence */
    resnum = memfile[idx].get_resnum();
    for(;idx >=0 && idx < pdb_rows; idx += delta){
      temp = memfile[idx].get_atname();
      temp += memfile[idx].get_altloc();
      if(resnum != memfile[idx].get_resnum())
	break;
      m2[temp] = idx;
    }

  /* Compute std. dihedrals for m1's AA */
    ppo_dhl(memfile, m0, m1, m2, ppo, x, y, z);
  /* Compute chi dihedrals for m1's AA */
    chi_dhl(memfile, m1, chi,chicnt, x, y, z, resname);
  /* Dump to file */
    write_dhl_line(ppo,chi,*chicnt, f_out);
  }
}

/* Computes phi, psi, and omega dihedrals.
 */
void ppo_dhl(Pdb_line* memfile,
	     map<string, int>& m0, map<string, int>& m1, map<string, int>& m2,
	     double* ppo,
	     double* x, double* y, double* z){

  /* phi calculation */
  x[0] = memfile[m0[" C   "]].get_x(); y[0] = memfile[m0[" C   "]].get_y();
  z[0] = memfile[m0[" C   "]].get_z();

  x[1] = memfile[m1[" N   "]].get_x(); y[1] = memfile[m1[" N   "]].get_y();
  z[1] = memfile[m1[" N   "]].get_z();

  x[2] = memfile[m1[" CA  "]].get_x(); y[2] = memfile[m1[" CA  "]].get_y();
  z[2] = memfile[m1[" CA  "]].get_z();

  x[3] = memfile[m1[" C   "]].get_x(); y[3] = memfile[m1[" C   "]].get_y();
  z[3] = memfile[m1[" C   "]].get_z();

  ppo[0] = dihedral_angle(x,y,z);

  /* psi calculation */
  x[0] = memfile[m1[" N   "]].get_x(); y[0] = memfile[m1[" N   "]].get_y();
  z[0] = memfile[m1[" N   "]].get_z();

  x[1] = memfile[m1[" CA  "]].get_x(); y[1] = memfile[m1[" CA  "]].get_y();
  z[1] = memfile[m1[" CA  "]].get_z();

  x[2] = memfile[m1[" C   "]].get_x(); y[2] = memfile[m1[" C   "]].get_y();
  z[2] = memfile[m1[" C   "]].get_z();

  x[3] = memfile[m2[" N   "]].get_x(); y[3] = memfile[m2[" N   "]].get_y();
  z[3] = memfile[m2[" N   "]].get_z();

  ppo[1] = dihedral_angle(x,y,z);

  /* omega calculation */
  x[0] = memfile[m1[" CA  "]].get_x(); y[0] = memfile[m1[" CA  "]].get_y();
  z[0] = memfile[m1[" CA  "]].get_z();

  x[1] = memfile[m1[" C   "]].get_x(); y[1] = memfile[m1[" C   "]].get_y();
  z[1] = memfile[m1[" C   "]].get_z();

  x[2] = memfile[m2[" N   "]].get_x(); y[2] = memfile[m2[" N   "]].get_y();
  z[2] = memfile[m2[" N   "]].get_z();

  x[3] = memfile[m2[" CA  "]].get_x(); y[3] = memfile[m2[" CA  "]].get_y();
  z[3] = memfile[m2[" CA  "]].get_z();

  ppo[2] = dihedral_angle(x,y,z);

}

//------------------------------------------------------------------
// Dr. Thomas Richard Ioerger's (my research advisor)
// dihedral computation code:
// (Originally implemented for triads.cpp, a program written by him.)

// multiply 3x3 matrix A by 3-vector B and put answer in C (another 3 vector)

void mult_mat_vec(double A[][3],double B[],double C[])
{
  int j,k;

  for (j=0 ; j<3 ; j++) 
  {
    C[j] = 0.0;
    for (k=0 ; k<3 ; k++) C[j] += A[j][k]*B[k];
  }
}

// i0....i1-----i2....i3
// translate i1 to origin
// rotate i2 to Z-axis
// project i0 and i3 onto XY-plane
// compute difference angle

double dihedral_angle(double x[4],double y[4],double z[4])
{
  int i,j,k;
  double rot[3][3],VA[3],VB[3];
  double a,b;

  x[0] -= x[1]; x[2] -= x[1]; x[3] -= x[1]; x[1] = 0;
  y[0] -= y[1]; y[2] -= y[1]; y[3] -= y[1]; y[1] = 0;
  z[0] -= z[1]; z[2] -= z[1]; z[3] -= z[1]; z[1] = 0;

  // rotate i2 around Z-axis to XZ-plane (make angle in XY-plane 0)

  a = asin(y[2]/sqrt(x[2]*x[2]+y[2]*y[2]));
  if (x[2]<0) a = PI-a;

  rot[0][0] = cos(-a);  rot[0][1] = -sin(-a); rot[0][2] = 0;
  rot[1][0] = sin(-a);  rot[1][1] = cos(-a);  rot[1][2] = 0;
  rot[2][0] = 0;        rot[2][1] = 0 ;       rot[2][2] = 1;

  VA[0] = x[0]; VA[1] = y[0]; VA[2] = z[0];
  mult_mat_vec(rot,VA,VB);
  x[0] = VB[0]; y[0] = VB[1]; z[0] = VB[2]; 

  VA[0] = x[2]; VA[1] = y[2]; VA[2] = z[2];
  mult_mat_vec(rot,VA,VB);
  x[2] = VB[0]; y[2] = VB[1]; z[2] = VB[2]; 

  VA[0] = x[3]; VA[1] = y[3]; VA[2] = z[3];
  mult_mat_vec(rot,VA,VB);
  x[3] = VB[0]; y[3] = VB[1]; z[3] = VB[2]; 

  // rotate i2 around Y-axis to Z-axis (make angle in XZ-plane 0)

  a = asin(x[2]/sqrt(x[2]*x[2]+z[2]*z[2]));
  if (z[2]<0) a = PI-a;

  rot[0][0] = cos(-a);  rot[0][1] = 0; rot[0][2] = sin(-a);
  rot[1][0] = 0;        rot[1][1] = 1; rot[1][2] = 0;
  rot[2][0] = -sin(-a); rot[2][1] = 0; rot[2][2] = cos(-a);

  VA[0] = x[0]; VA[1] = y[0]; VA[2] = z[0];
  mult_mat_vec(rot,VA,VB);
  x[0] = VB[0]; y[0] = VB[1]; z[0] = VB[2]; 

  VA[0] = x[2]; VA[1] = y[2]; VA[2] = z[2];
  mult_mat_vec(rot,VA,VB);
  x[2] = VB[0]; y[2] = VB[1]; z[2] = VB[2]; 

  VA[0] = x[3]; VA[1] = y[3]; VA[2] = z[3];
  mult_mat_vec(rot,VA,VB);
  x[3] = VB[0]; y[3] = VB[1]; z[3] = VB[2]; 

  // SGj is on Z-axis, project CBi and CBj onto XY-plane to compute angles

  a = asin(y[0]/sqrt(x[0]*x[0]+y[0]*y[0]));
  if (x[0]<0) a = PI-a;
  b = asin(y[3]/sqrt(x[3]*x[3]+y[3]*y[3]));
  if (x[3]<0) b = PI-b;
 
  a = 180.0*a/PI;
  b = 180.0*b/PI;
  a = b-a;
  if (a<-180) a = a+360;
  if (a>180) a = a-360;
  return a;
}

//----------------------------------------------------------------------

/* Computes the chi dihedrals of a residue.
 */
void chi_dhl(Pdb_line* memfile,
	     map<string, int>& m,
	     double* chi,
	     int* chicnt,
	     double* x, double* y, double* z,
	     char* resname){

  /* Every amino acid should have a 'CA' */
  strcpy(resname, memfile[m[" CA  "]].get_resname());
    
  if(strcmp(resname,"ALA")==0) chi_ala(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"GLY")==0) chi_gly(chicnt);
  if(strcmp(resname,"VAL")==0) chi_val(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"TRP")==0) chi_trp(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"SER")==0) chi_ser(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"THR")==0) chi_thr(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"LYS")==0) chi_lys(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"LEU")==0) chi_leu(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"ILE")==0) chi_ile(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"ASN")==0) chi_asn(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"ARG")==0) chi_arg(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"GLN")==0) chi_gln(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"MET")==0) chi_met(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"PRO")==0) chi_pro(chicnt);
  if(strcmp(resname,"GLU")==0) chi_glu(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"PHE")==0) chi_phe(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"TYR")==0) chi_tyr(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"CYS")==0) chi_cys(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"HIS")==0) chi_his(memfile,m,chi,chicnt,x,y,z);
  if(strcmp(resname,"ASP")==0) chi_asp(memfile,m,chi,chicnt,x,y,z);

}













