#ifndef DOCKRMSD_H #define DOCKRMSD_H #include #include #include #include #include #include /* DockRMSD: an open-source tool for atom mapping and RMSD calculation of symmetric molecules through graph isomorphism Written by Eric Bell v1.0 written 5/2/2019 latest update (v1.1) written 8/26/2019 To compile, use: gcc DockRMSD.c -o DockRMSD -lm -O3 */ const int MAXBONDS=6; //Maximum number of bonds allowable on a single atom const int MAXLINELENGTH=150; //Maximum length (in characters) of a line in a mol2 file const double MAXMAPCOUNT=0; //Maximum amount of possible mappings before symmetry heuristic is used const int MAXDEPTH=2; const char* header="######################################################################\n" "# DockRMSD (v1.1): docking pose distance calculation #\n" "# #\n" "# If you use DockRMSD in your work, please cite: #\n" "# #\n" "# Bell, E.W., Zhang, Y. DockRMSD: an open-source tool for atom #\n" "# mapping and RMSD calculation of symmetric molecules through graph #\n" "# isomorphism. Journal of Cheminformatics, 11:40 (2019). #\n" "######################################################################\n"; int grabAtomCount(FILE* mol2,int hflag); int arrayIdentity(char** arr1,char** arr2, int arrlen); int inArray(int n, int* arr, int arrlen); void readMol2(char** atoms, double** coords, char*** bonds, int* nums, FILE* mol2, int atomcount, int hflag); int generalizeBonds(char*** bonds, int atomcount); char** buildTree(int depth, int index, char** atoms, char*** bonds, char* prestring, int prevind, int atomcount); double searchAssigns(int atomcount, int** allcands, int candcounts[], int* assign, char*** tempbond, char*** querybond, double** querycoord, double** tempcoord, int* bestassign); double assignAtoms(char** tempatom, char*** tempbond, char** queryatom, char*** querybond, double** querycoord, double** tempcoord, int* querynums, int* tempnums, int atomcount, int simpleflag); int validateBonds(int* atomassign, int proposedatom, int assignpos, char*** querybond, char*** tempbond, double querydists[], int atomcount); //DockRMSD performs DockRMSD's RMSD calculation and returns the resulting value //fname1 and fname2 are the file names of the .mol2 files to be compared //hflag has a value of 1 when hydrogens are to be used, 0 otherwise //corrflag has a value of 1 when one to one correspondence is to be used, 0 otherwise //verbflag has a value of 1 when the output of the standalone program is to be printed, otherwise 0 double DockRMSD(char* fname1, char* fname2, int hflag, int corrflag, int verbflag){ //Check if all inputs are given and valid FILE* query=fopen(fname1,"r"); FILE* template=fopen(fname2,"r"); if (query==NULL){ printf("Query file %s not found!\n",fname1); return -1.0; } if (template==NULL){ printf("Template file %s not found!\n",fname2); return -1.0; } if (verbflag){ printf("%s\n",header); } //Count how many atoms are in the query and template int querycount=grabAtomCount(query,hflag); int tempcount=grabAtomCount(template,hflag); if (querycount!=tempcount){ printf("%s\n","Error: Query and template don't have the same atom count!"); return -2.0; } if (querycount==0){ printf("%s\n","Error: Query file has no atoms!"); return -2.0; } if (tempcount==0){ printf("%s\n","Error: Template file has no atoms!"); return -2.0; } //Initialize pointer arrays and fill them with information from the mol2 files char** queryatoms=malloc(sizeof(char*)*querycount); double** querycoords=malloc(sizeof(double*)*querycount); char*** querybonds=malloc(sizeof(char**)*querycount); char** tempatoms=malloc(sizeof(char*)*tempcount); double** tempcoords=malloc(sizeof(double*)*tempcount); char*** tempbonds=malloc(sizeof(char**)*tempcount); int* querynums=malloc(sizeof(int)*querycount); int* tempnums=malloc(sizeof(int)*tempcount); int i; int j; for(i=0;i 1 && line[strlen(line)-2]=='\r'){ //For windows line endings line[strlen(line)-2]='\n'; line[strlen(line)-1]='\0'; } if (!strcmp(line,"@ATOM\n")){ countflag=1; continue; } if (!strcmp(line,"@BOND\n")){ countflag=0; break; } if (countflag && strlen(line)>1){ char* token=strtok(line," \t"); int i; for (i=0;i<5;i++){ token=strtok(NULL," \t"); } if (hflag || strcmp(token,"H")){ atomcount++; } } } if(ferror(mol2)){ printf("Error %d while reading in file.\n",ferror(mol2)); } rewind(mol2); //resets the file pointer for use in other functions return atomcount; } //Fills atoms, coords, and bonds with information contained within a mol2 file void readMol2(char** atoms, double** coords, char*** bonds, int* nums, FILE* mol2, int atomcount, int hflag){ char line[MAXLINELENGTH]; int atomnums[atomcount]; //Keeps track of all non-H atom numbers for bond reading int i=0; int sectionflag=0; //Value is 1 when reading atoms, 2 when reading bonds, 0 before atoms, >2 after bonds while(fgets(line,MAXLINELENGTH,mol2)!=NULL){ if (strlen(line) < 2){ //Skip blank lines continue; } if (strlen(line) > 1 && line[strlen(line)-2]=='\r'){ //Handling windows line endings line[strlen(line)-2]='\n'; line[strlen(line)-1]='\0'; } if (!strcmp(line,"@ATOM\n") || (sectionflag && line[0]=='@')){ sectionflag++; } else if (sectionflag==1){ //Reading in atoms and coordinates double coord[3]; int j=0; char* parts=strtok(line," \t"); int atomnum=atoi(parts); parts=strtok(NULL," \t"); for (j=0;j<3;j++){ parts=strtok(NULL," \t"); coord[j]=atof(parts); } parts=strtok(NULL," \t"); if (hflag || strcmp("H",parts)){ char* element=strtok(parts,"."); strcpy(*(atoms+i),element); atomnums[i]=atomnum; for (j=0;j<3;j++){ *(*(coords+i)+j)=coord[j]; } i++; } } else if (sectionflag==2){ //Reading in bonding network char* parts=strtok(line," \t"); parts=strtok(NULL," \t"); int from=inArray(atoi(parts),atomnums,atomcount)-1; parts=strtok(NULL," \t"); int to=inArray(atoi(parts),atomnums,atomcount)-1; parts=strtok(NULL," \t"); parts=strtok(parts,"\n"); if (from >= 0 && to >= 0){ strcpy(*(*(bonds+to)+from),parts); strcpy(*(*(bonds+from)+to),parts); } } } for(i=0;i*(*(dists+index)+j+1)){ int tempind=*(*(allcands+index)+j); double tempdist=*(*(dists+index)+j); *(*(allcands+index)+j)=*(*(allcands+index)+j+1); *(*(dists+index)+j)=*(*(dists+index)+j+1); *(*(allcands+index)+j+1)=tempind; *(*(dists+index)+j+1)=tempdist; } } } } /* for (i=0;i0 && histinds[index]==candcounts[history[index]]){ histinds[index]=0; *(assign+history[index])=-1; for (i=0;i bestTotal){ //Dead end elimination check break; } if (!inArray(*(*(allcands+history[index])+i),assign,atomcount) && validateBonds(assign,*(*(allcands+history[index])+i),history[index],querybond,tempbond,querydists,atomcount)){ //Feasibility check foundflag=1; *(assign+history[index])=*(*(allcands+history[index])+i); histinds[index]=i+1; runningTotal+=*(*(dists+history[index])+i); index++; break; } } if (!foundflag){ //This occurs if none of the remaining possibilities can be mapped if (index==0){ break; } else { histinds[index]=0; *(assign+history[index])=-1; for (i=0;i=0 && !strcmp(*(*(tempbond+proposedatom)+assignatom),"")){ return 0; } } return 1; } //Returns the lowest RMSD of all possible mappings for query atoms with template indices given the two molecules' bonding network double assignAtoms(char** tempatom, char*** tempbond, char** queryatom, char*** querybond, double** querycoord, double** tempcoord, int* querynums, int* tempnums, int atomcount, int simpleflag){ int** allcands=malloc(sizeof(int*)*atomcount); //List of all atoms in the template that could feasibly be each query atom int candcounts[atomcount]; //Number of atoms in the template that could feasibly be each query atom int i; //Iterate through each query atom and determine which template atoms correspond to the query for(i=0;i Second file, * indicates correspondence is not one-to-one):\n"); for (i=0;i %s%3d ",*(queryatom+i),*(querynums+i),*(tempatom+*(bestassign+i)),*(tempnums+*(bestassign+i))); if (*(querynums+i)==*(tempnums+*(bestassign+i))){ printf("\n"); } else { printf("*\n"); } } } free(assign); free(bestassign); return bestrmsd; } #endif //DOCKRMSD_H