#!/usr/bin/env python

# Author:  Sam Clark, CPFD Software, LLC
# Date:    January 15, 2018
# Purpose: A script to run standardized Barracuda timing problems.

from __future__ import print_function
from __future__ import division
import sys
import os
import argparse
import logging
import platform
import psutil
import subprocess
import datetime
import shutil
import re

# The cpuinfo module is not installed by default with Anaconda.  If necessary,
# install it using the pip module.  This requires an active internet connection.
# Found at: https://stackoverflow.com/questions/12332975/installing-python-module-within-code
# Slightly modified because the 'cpuinfo' module comes from a package called
# 'py-cpuinfo', so the originally posted function doesn't quite work.
import importlib
try:
    importlib.import_module('cpuinfo')
except ImportError:
    print("cpuinfo module not found.  Attempting to installing with pip.")
    print("Note: this requires an internet connection.")
    import pip
    pip.main(['install', 'py-cpuinfo'])
finally:
    globals()['cpuinfo'] = importlib.import_module('cpuinfo')

# Function to convert raw "bytes" into human-readable MB, GB, etc.
# Found at: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
# Slightly modified based on my preferences for spacing and prefixes
def sizeof_fmt(num, suffix='B'):
    for unit in ['','k','M','G','T','P','E','Z']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Y', suffix)

parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description='''
A script to run the standard Barracuda timing test.

An example of typical usage of this script is:

RUN_TIMING_PROBLEM.py

''')

parser.add_argument("-r", "--run", required=False, nargs="+", default=['c', 'cg', 'og'],
        help="Run timing tests of type c=cpu serial, o=cpu omp parallel, and/or g=gpu parallel")

parser.add_argument("-n", "--numcores", required=False,
        help="Number of cores to use for CPU parallel, default = N - 1")

parser.add_argument("-d", "--gpudevices", required=False, nargs="+", default=['all'],
        help="GPU device(s) to use, as numbered by Barracuda, default = all")

parser.add_argument("-l", "--listgpus", action="store_true",
        help="List GPU devices, as numbered by Barracuda, and exit")

parser.add_argument("-H", "--hostname", action="store_true",
        help="Record system hostname in results file, default = False")

parser.add_argument("-D", "--debug", action="store_true",
        help="Show debugging log messages")

args = parser.parse_args()

# Set up logging options
if args.debug:
    logging.basicConfig(
        level = logging.DEBUG,
        format = "%(levelname)-10s %(asctime)s %(message)s"
        )
else:
    logging.basicConfig(
        level = logging.INFO,
        format = "%(levelname)-10s %(asctime)s %(message)s"
        )

log = logging.getLogger('barracuda_timing_problem')

myOS = platform.system()

if args.listgpus:
    log.info("Listing available GPU devices")
	
    if myOS == 'Linux':
        print(subprocess.check_output(["cpfd.x.17.2.0", "-gpu", "-dlist"]))
    elif myOS == 'Windows':
        print(subprocess.check_output(["cpfd.x.17.2.0", "-gpu", "-dlist"], shell=True))
    else:
        log.info("Error! Could not determine OS")
    
    log.info("Exiting")
    sys.exit(0)

log.info("Collecting system hardware information")

# Prepare log file to record system information and timing results
ts = datetime.datetime.now()
dateTimeStamp = ts.strftime("%Y-%m-%d_%H%M%S")
logFile = dateTimeStamp + '_barracuda_timing_results.txt'
f = open(logFile, 'w')

f.write("\n")
f.write("================================\n")
f.write("Timing Run Info\n")
f.write("================================\n")

if args.hostname:
    f.write("Hostname: " + platform.node() + "\n")

dateTimeStamp = ts.strftime("%Y-%m-%d %H:%M:%S")
f.write("Timing run started: " + dateTimeStamp + "\n")
f.write("Run command: " + " ".join(str(x) for x in sys.argv) + "\n")

f.write("\n")
f.write("================================\n")
f.write("Operating System\n")
f.write("================================\n")
f.write(platform.platform() + "\n")

f.write("\n")
f.write("================================\n")
f.write("CPU Information\n")
f.write("================================\n")

myCPU = cpuinfo.get_cpu_info()

f.write(myCPU.get('brand') + "\n")

cpuCores = myCPU.get('count')
f.write("Number of cores: " + str(cpuCores) + "\n")

if args.numcores:
    useCores = args.numcores
else:
    useCores = int(cpuCores) - 1

log.info("Using " + str(useCores) + " CPU cores for CPU parallel")

f.write("\n")
f.write("================================\n")
f.write("RAM Information\n")
f.write("================================\n")

myRAM = psutil.virtual_memory()

f.write("Total: " + sizeof_fmt(myRAM.total) + "\n")
f.write("Avail: " + sizeof_fmt(myRAM.available) + "\n")

f.write("\n")
f.write("================================\n")
f.write("GPU Information\n")
f.write("================================\n")

f.write("\n")
f.write("--------------------------------\n")
f.write("From nvidia-smi\n")
f.write("--------------------------------\n")

if myOS == 'Linux':
    gpuList = subprocess.check_output(["nvidia-smi", "-L"])
    nvidiasmi = subprocess.check_output(["nvidia-smi"])
elif myOS == 'Windows':
    gpuList = subprocess.check_output(["C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe", "-L"])
    nvidiasmi = subprocess.check_output(["C:\\Program Files\\NVIDIA Corporation\\NVSMI\\nvidia-smi.exe"])
else:
    log.info("Error! Could not determine OS")
    gpuList = 'Could not determine OS'
    nvidiasmi = 'Could not determine OS'

f.write(gpuList + "\n")
f.write(nvidiasmi + "\n")

f.write("\n")
f.write("--------------------------------\n")
f.write("From Barracuda\n")
f.write("--------------------------------\n")

if myOS == 'Linux':
   gpuList = subprocess.check_output(["cpfd.x.17.2.0", "-gpu", "-dlist"])
elif myOS == 'Windows':
   gpuList = subprocess.check_output(["cpfd.x.17.2.0", "-gpu", "-dlist"], shell=True)
else:
    log.info("Error! Could not determine OS")
    sys.exit(1)

f.write(gpuList + "\n")

# Automatically detect which GPUs on the system are capable of running the
# current timing test problem, and use all of them (assuming that the 'auto'
# option is in effect for args.gpudevices.

# The regular expression and splitting commands below assume that the Barracuda
# GPU table looks like this (from 17.2.0):
#
# There are 4 CUDA devices.
#           4 are CUDA FERMI (compute 2.0) or higher devices.
#           4 are available for computation.
# 
# +--------------------------------------------------------------------------------------------------------------------------------+
# | Barracuda Capable GPUs available for selection                                                                                 |
# +--------------------------------------------------------------------------------------------------------------------------------+
# | Dev# | busID | Compute | Clock    | Cores | Memory Usage     | Bandwidth | Compute Mode       | Name                           |
# +------+-------+---------+----------+-------+------------------+-----------+--------------------+--------------------------------+
# |    0 |     6 |     6.1 | 1.58 GHz |  5760 |  0.16 / 11.90 GB |  5.7 GB/s | DEFAULT            | TITAN Xp                       |
# |    1 |     5 |     3.5 | 0.75 GHz |  2880 |  0.07 / 11.17 GB |  5.6 GB/s | DEFAULT            | Tesla K40c                     |
# |    2 |     3 |     3.5 | 0.77 GHz |  2304 |  0.11 /  7.93 GB |  5.5 GB/s | DEFAULT            | Quadro K5200                   |
# |    3 |     4 |     5.2 | 1.08 GHz |  4608 |  0.11 / 11.92 GB |  5.7 GB/s | DEFAULT            | GeForce GTX TITAN X            |
# +------+-------+---------+----------+-------+------------------+-----------+--------------------+--------------------------------+
# 
# Tue Jan 16 09:21:27 2018

# For the moment, our timing problem is about 5 GB.  We only want to run tests
# on cards that are at least this big.
minGpuSize = 5

gpusToUse = []
gpuTable = re.split('\+.*\+\n', gpuList)[-2]
gpuLines = gpuTable.splitlines()

for gpuLine in gpuLines:
    gpuInfo = gpuLine.split()
    gpuID = gpuInfo[1]
    gpuName = " ".join(gpuInfo[22:-1])
    gpuSize = float(gpuInfo[14])

    if args.gpudevices == ['all']:
        if gpuSize > minGpuSize:
            gpusToUse.append([gpuID, gpuName])
            log.info("GPU identified for timing test: " + gpuID + " " + gpuName)
        else:
            log.info("GPU too small for timing test (will not be used): " + gpuID + " " + gpuName)

    else:
        if gpuID in args.gpudevices:
            gpusToUse.append([gpuID, gpuName])
            log.info("GPU identified for timing test: " + gpuID + " " + gpuName)

log.info("Starting selected timing runs")

def startRun(runName, runDir, runCommand):
    log.info("Starting " + runName + " timing run")

    f.write("\n")
    f.write("================================\n")
    f.write(runName + " Run\n")
    f.write("================================\n")

    f.write(" ".join(str(x) for x in runCommand) + "\n")

    startTime = datetime.datetime.now()
    dateTimeStamp = startTime.strftime("%Y-%m-%d %H:%M:%S")
    f.write("Start time: " + dateTimeStamp + "\n")

    parentDir = os.getcwd()

    if os.path.exists(runDir):
        shutil.rmtree(runDir)

    shutil.copytree('base', runDir)

    os.chdir(runDir)

    if myOS == 'Linux':
        runSolver = subprocess.check_output(runCommand)
    elif myOS == 'Windows':
        runSolver = subprocess.check_output(runCommand, shell=True)

    os.chdir(parentDir)

    endTime = datetime.datetime.now()
    dateTimeStamp = endTime.strftime("%Y-%m-%d %H:%M:%S")
    f.write("End time: " + dateTimeStamp + "\n")

    runTime = endTime - startTime
    f.write("Run time: " + str(runTime.total_seconds()) + "\n")

    log.info("Finished running " + runName)

if 'c' in args.run:
    startRun("CPU Serial", "cpu_serial_no_gpu", ["cpfd.x.17.2.0", "timing_run.prj"])

if 'o' in args.run:
    startRun("CPU Parallel", "cpu_parallel_no_gpu", ["cpfd.x.17.2.0", "timing_run.prj"])

for gpu in gpusToUse:
    gpuID = gpu[0]

    if 'cg' in args.run:
        startRun("CPU Serial + GPU Parallel", "cpu_serial_gpu_parallel",
                 ["cpfd.x.17.2.0", "-gpu", "-device", str(gpuID), "timing_run.prj"])

    if 'og' in args.run:
        startRun("CPU Parallel + GPU Parallel", "cpu_parallel_gpu_parallel",
                 ["cpfd.x.17.2.0", "-gpu", "-device", str(gpuID), "-omp", str(useCores), "timing_run.prj"])

log.info("All timing runs are finished")
