#!/usr/bin/env python

#
# Program to extract converged geometry, MOs, and VECTORS from PUNCH file
# after geometry optimization
#
# Dr. Kalju Kahn, UC Santa Barbara, May 2007
# Private development version.  No warranties whatsoever.  
#
import sys
import re
import os

# Check if explicit input is given; if not assume PUNCH
if len(sys.argv ) == 1 :
   inp_file_name = "PUNCH"
elif len(sys.argv ) == 2 :
   inp_file_name = sys.argv[ 1 ]
else:
   sys.exit ( "Usage: parse_punch.py punchfile" )

# Make sure the line terminators are Unix style (code by Fredrik Lundh)
def run(program, *args):
    pid = os.fork()
    if not pid:
        os.execvp(program, (program,) +  args)
    return os.wait()[0]

run("dos2unix", inp_file_name)

# Give unique file names to new files
geo_file_name = inp_file_name.split('.')[0] + ".geo"
mos_file_name = inp_file_name.split('.')[0] + ".mos"
hes_file_name = inp_file_name.split('.')[0] + ".hes"
new_file_name = inp_file_name.split('.')[0] + ".in2"


# Open files
try:
   inpfile = open( inp_file_name )
except IOError:
   print >> sys.stderr, "Input file could not be opened"
   sys.exit( 1 )

try:
   geo_file = open( geo_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the geometry output file"
   sys.exit( 1 )
try:
   mos_file = open( mos_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the MO output file"
   sys.exit( 1 )

try:
   hes_file = open( hes_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the Hessian output file"
   sys.exit( 1 )

try:
   new_file = open( new_file_name, 'w' )
except IOError:
   print >> sys.stderr, "Failed to open the seed for the next input file"
   sys.exit( 1 )

# Analyze the content of the PUNCH file
geo_ini = 0
geo_end = 0
mos_ini = 0
mos_end = 0
hes_ini = 0
hes_end = 0
line_counter = 0
records = inpfile.readlines()
for rec in records:
    line_counter = line_counter +1 
    field = rec.split()
    if len(field) > 0:
       if field[0] == "$DATA":
          comm = line_counter
          symm = line_counter + 1
       elif field[0] == "-----":
          if re.search ( "RESULTS FROM SUCCESSFUL"  , rec ):
             print rec
             geo_ini = line_counter + 4
       elif field[0] == "---":
          if re.search ( "OPTIMIZED ", rec ):
             print rec
             geo_end = line_counter - 1 
             mos_ini = line_counter + 1
       elif field[0] == "POPULATION":
             print rec
             mos_end = line_counter - 1
       elif field[0] == "$HESS":
             hes_ini = line_counter -1
       elif field[0] == "$DIPDR":
             hes_end = line_counter - 1       

# Create a rudimentary header and append important bits to new files 
print >> new_file, " $contrl scftyp=XXX runtyp=XXX $end"
print >> new_file, " $system mwords=XXX timlim=XXX $end"
print >> new_file, " $guess guess=moread norb=XXX $end"
print >> new_file, " $basis gbasis=XXX $end"
print >> new_file, " $data"
inpfile.seek(0)
lines = inpfile.readlines()
print >> new_file, lines[comm],
print >> new_file, lines[symm],
if lines[symm].split()[0] != "C1":
   print >> new_file, " "

if (geo_ini > 5):
   print "LOOKS LIKE A SUCCESSFUL OPTIMIZATION JOB"
   for i in range (geo_ini, geo_end):
      print >> geo_file, lines[i],
      print >> new_file, lines[i],
   print >> new_file, " $end"
else:
   print "DOES NOT LOOK LIKE A SUCCESSFUL OPTIMIZATION JOB"
if (mos_ini > 5):
   print "LOOKS LIKE MO COEFFICIENTS ARE PRESENT"
   for i in range (mos_ini, mos_end):
      print >> mos_file, lines[i],
      print >> new_file, lines[i],
else:
   print "DOES NOT LOOK LIKE A SUCCESSFUL JOB"
if (hes_ini > 5):
   print "LOOKS LIKE HESSIAN ELEMENTS ARE PRESENT" 
   for i in range (hes_ini, hes_end):
      print >> hes_file, lines[i],
      print >> new_file, lines[i],
   print "Created files ", geo_file_name, mos_file_name, hes_file_name, new_file_name
else:
   print "DOES NOT LOOK LIKE A SUCCESSFUL JOB"

# Would be nice to extract number of MO's from the PUNCH file ...
