#!/usr/bin/env python

# Program to convert MDL V2000 molfile (.mol) to PC GAMESS input file
#   The only important thing is ability to symmetrize nearly symmetric structures
#   via -s keyword with optional parameters that determine symmetrization
#   Requires SYMMOL symmetrizer 
#
#   Is tested with few simple symmetries and seems to behave OK with Cs symmetry
#   For higher symmetry, axis rearrangment is handeled by SYMMOL and is not communicated to PC GAMESS
#
# Dr. Kalju Kahn, UCSB, May 2007

import sys, os
import re

#
# Define common atoms and their charges (MOLDEN mol files print only one letter :(  )
#
def element_charge(elem):
   if   elem == "H":
      chrg = 1
   elif elem == "He":
      chrg = 2
   elif elem == "Li":
      chrg = 3
   elif elem == "Be":
      chrg = 4
   elif elem == "B":
      chrg = 5
   elif elem == "C":
      chrg = 6
   elif elem == "N":
      chrg = 7
   elif elem == "O":
      chrg = 8
   elif elem == "F":
      chrg = 9
   elif elem == "Ne":
      chrg = 10
   elif elem == "Na":
      chrg = 11
   elif elem == "Mg":
      chrg = 12
   elif elem == "Al":
      chrg = 13
   elif elem == "Si":
      chrg = 14
   elif elem == "P":
      chrg = 15
   elif elem == "S":
      chrg = 16
   elif elem == "Cl":
      chrg = 17
   elif elem == "Ar":
      chrg = 18
   elif elem == "K":
      chrg = 19
   elif elem == "Ca":
      chrg = 20
   elif elem == "Fe":
      chrg = 26
   elif elem == "Zn":
      chrg = 30
   elif elem == "Br":
      chrg = 35
   else:
      print "==> UNRECOGNIZED ELEMENT: NEED TO ASSIGN CHARGES MANUALLY"
      chrg = Z
   return chrg

#
# Name and open various necessary files
#
inp_file_name = sys.argv[ 1 ]

geo_file_name = inp_file_name.split('.')[0] + ".geo"
smt_file_name = inp_file_name.split('.')[0] + ".smt"
sym_file_name = inp_file_name.split('.')[0] + ".sym"
uni_file_name = inp_file_name.split('.')[0] + ".uni"

try:
   inp_file = open( inp_file_name )
except IOError:
   print >> sys.stderr, "Input molfile could not be opened"
   sys.exit( 1 )

try:
   geo_file = open( geo_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the geometry output file"

try:
   smt_file = open( smt_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the temporary symmetry file"

#
#  Read the input file once to figure out where is the geometry (use V2000 molfile label)
#
line_counter = 0
records = inp_file.readlines()
for rec in records:
    line_counter = line_counter +1
    field = rec.split()
    if len(field) > 0:
       if re.search ( "V2000", rec ):
          counts_lin = line_counter
          n_atoms = int(field[0])

#
# Read the input file second time and create non-symmetrized files for PC GAMESS and SYMMOL
#
if (n_atoms > 0):
   print >> geo_file,  " $contrl scftyp=XXX runtyp=XXX coord=cart $end"
   print >> geo_file,  " $data"
   print >> smt_file,  "#   GEOMETRY BEFORE SYMMETRIZATION"
   print >> smt_file, "   1.00000  1.00000  1.00000 90.00000 90.00000 90.00000"
   print >> smt_file, "   1  1   .300000   .300000"
   geo_ini = counts_lin
   geo_end = counts_lin + n_atoms
   inp_file.seek(0)
   lines = inp_file.readlines()
   for i in range (geo_ini , geo_end):
      atom_field = lines[i].split()
      x_coor     = float(atom_field[0])
      y_coor     = float(atom_field[1])
      z_coor     = float(atom_field[2])
      elem       = atom_field[3]
      chrg       = element_charge(elem)
      print >> geo_file, "%2s     %2s   % .6f   % .6f   % .6f" % (elem,  chrg , x_coor, y_coor, z_coor)
      print >> smt_file, "%2s     %1s % .5f % .5f % .5f" % (elem,  1, x_coor, y_coor, z_coor)
   print >> geo_file, " $end"
inp_file.close()
geo_file.close()
smt_file.close()

#
# Run the SYMMOL to symmetrize if possible
#
print "                                                  "
print " ==>  TRYING TO SYMMETRIZE THE GEOMETRY"
os.system("symmol <" + smt_file_name)

try:
   out_file = open( "symmol.out", 'r' )
except IOError:
   print >> sys.stderr, "Failed to open the temporary symmetry file"

try:
   sym_file = open( sym_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the all atom symmetry output file"

try:
   uni_file = open( uni_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the unique atom symmetry output file"

#
# Now analyze the symmol.out to find where the geometry 
#
ini_lin = 0
line_counter = 0
records = out_file.readlines()
for rec in records:
    line_counter = line_counter +1
    field = rec.split()
    if len(field) > 0:
       if re.search ( "SYMMETRIZED ORTHOGONAL COORDINATES", rec ):
          ini_lin = line_counter
       elif re.search ( "Atom defining the asymmetric", rec ):
          end_lin = line_counter

#
# Now analyze the last line of the symmol.out to see what do about atom order; this works only with Cs symmetry
#
xyz_switch = "none"
if re.search ( "[Cs ]", rec):    
   symm_el = "Cs"  
   if re.search ( "-x,y,z", rec):
      xyz_switch = "xz"
   if re.search ( "x,-y,z", rec):
      xyz_switch = "yz"

#
#   Write output:
#      sym_file receives all atoms (use coord=cart)
#      uni_fiel receives unique atoms (use coord=unique)

#
if (ini_lin > 0):
   print " ==> SYMMETRIZATION SUCCESSFUL"
   print >> sym_file,  " $contrl scftyp=XXX runtyp=XXX coord=cart $end"
   print >> sym_file,  " $data"
   print >> sym_file,  "GEOMETRY AFTER SYMMETRIZATION"
   print >> uni_file,  " $contrl scftyp=XXX runtyp=XXX coord=unique $end"
   print >> uni_file,  " $data"
   print >> uni_file, "GEOMETRY AFTER SYMMETRIZATION"
   if symm_el == "Cs":
      print >> uni_file, "Cs"
      print >> uni_file, " "
   out_ini = ini_lin 
   out_end = end_lin - 2 
   out_file.seek(0)
   lines = out_file.readlines()
   for i in range (out_ini , out_end):
      atom_field = lines[i].split()
      elem       = atom_field[0]
      chrg       = element_charge(elem)
      x_coor     = float(atom_field[2])
      y_coor     = float(atom_field[3])
      z_coor     = float(atom_field[4])
      if len(atom_field) == 9:
         if xyz_switch == "none":
            print >> uni_file, "%2s     %2s   % .6f   % .6f   % .6f" % (elem,  chrg , x_coor, y_coor, z_coor)
         elif xyz_switch == "xz":
            print >> uni_file, "%2s     %2s   % .6f   % .6f   % .6f" % (elem,  chrg , z_coor, y_coor, x_coor)
         elif xyz_switch == "yz":
            print >> uni_file, "%2s     %2s   % .6f   % .6f   % .6f" % (elem,  chrg , x_coor, z_coor, y_coor)
      print >> sym_file, "%2s     %2s   % .6f   % .6f   % .6f" % (elem,  chrg , x_coor, y_coor, z_coor)
   print >> uni_file, "$end"
   print >> sym_file, "$end"
else:
   print " ==> STRUCTURE CANNOT BE SYMMETRIZED"
