#!/usr/bin/env python

"""
GIP provider for htcondor-CE.
"""

# we cannot change the name of the script.
# pylint: disable=invalid-name


from __future__ import print_function
import sys
from datetime import datetime
import subprocess
from collections import defaultdict
import signal
import htcondor
import classad as ca

SERVICE_LDIF = """dn: {bind_dn}
GLUE2ServiceID: {central_manager}
objectClass: GLUE2Entity
objectClass: GLUE2Service
objectClass: GLUE2ComputingService
GLUE2EntityName: Computing Service {central_manager}
GLUE2ServiceCapability: executionmanagement.jobexecution
GLUE2ServiceType: org.opensciencegrid.htcondorce
GLUE2ServiceQualityLevel: production
GLUE2ServiceComplexity: endpointType={num_endpoints}, share={num_shares}, resource=1
GLUE2ServiceAdminDomainForeignKey: {site_name}
"""


MANAGER_LDIF = """dn: GLUE2ManagerID={central_manager}_Manager,{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Manager
objectClass: GLUE2ComputingManager
GLUE2ManagerID: {central_manager}_Manager
GLUE2ManagerProductName: HTCondor
GLUE2ManagerProductVersion: {version}
GLUE2ComputingManagerTotalLogicalCPUs: {total_cores}
GLUE2ManagerServiceForeignKey: {central_manager}
GLUE2ComputingManagerComputingServiceForeignKey: {central_manager}
"""


RESOURCE_LDIF = """dn: GLUE2ResourceID={central_manager}_{resource},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Resource
objectClass: GLUE2ExecutionEnvironment
GLUE2ResourceID: {central_manager}_{resource}
GLUE2ExecutionEnvironmentMainMemorySize: {memory}
GLUE2ExecutionEnvironmentVirtualMemorySize: {memory}
GLUE2ExecutionEnvironmentOSFamily: {os}
GLUE2ExecutionEnvironmentOSName: {name}
GLUE2ExecutionEnvironmentOSVersion: {version}
GLUE2ExecutionEnvironmentCPUMultiplicity: singlecpu-multicore
GLUE2ExecutionEnvironmentPlatform: {arch}
GLUE2ExecutionEnvironmentLogicalCPUs: {cpu}
GLUE2ExecutionEnvironmentConnectivityIn: TRUE
GLUE2ExecutionEnvironmentConnectivityOut: TRUE
GLUE2ExecutionEnvironmentTotalInstances: {instances}
GLUE2ResourceManagerForeignKey:  {central_manager}_Manager
GLUE2ExecutionEnvironmentComputingManagerForeignKey:  {central_manager}_Manager
"""


ENDPOINT_LDIF = """dn: GLUE2EndpointID={name}_HTCondorCE,{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Endpoint
objectClass: GLUE2ComputingEndpoint
GLUE2EndpointID: {name}_HTCondorCE
GLUE2EndpointCapability: executionmanagement.jobexecution
GLUE2EndpointInterfaceName: org.opensciencegrid.htcondorce
GLUE2EndpointImplementor: HTCondor
GLUE2EndpointImplementationName: HTCondor
GLUE2EndpointImplementationVersion: {version}
GLUE2EndpointURL: condor://{name}:9619
GLUE2EndpointQualityLevel: production
GLUE2EndpointServingState: production
GLUE2EndpointHealthState: {state}
GLUE2EndpointHealthStateInfo: {state_info}
GLUE2EndpointStartTime: {start_time}
GLUE2EndpointIssuerCA: {issuer}
GLUE2EndpointDowntimeInfo: See the GOC DB for downtimes: https://goc.egi.eu/
GLUE2EndpointServiceForeignKey: {central_manager}
GLUE2ComputingEndpointComputingServiceForeignKey: {central_manager}
"""

SPEC_LDIF = """dn: GLUE2BenchmarkID={central_manager}_{spec_type},GLUE2ResourceID={central_manager}_{resource},GLUE2ServiceID={central_manager},GLUE2GroupID=resource,o=glue
GLUE2BenchmarkExecutionEnvironmentForeignKey: {central_manager}_{resource}
GLUE2BenchmarkID: {central_manager}_{spec_type}
GLUE2BenchmarkType: {spec_type}
objectClass: GLUE2Entity
objectClass: GLUE2Benchmark
GLUE2BenchmarkValue: {spec_value}
GLUE2BenchmarkComputingManagerForeignKey: {central_manager}_Manager
GLUE2EntityName: Benchmark {spec_type}
"""

SHARE_LDIF = """dn: GLUE2ShareID={shareid},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Share
objectClass: GLUE2ComputingShare
GLUE2ShareID: {shareid}
GLUE2ComputingShareServingState: production
GLUE2ComputingShareTotalJobs: {total_vo_jobs}
GLUE2ComputingShareWaitingJobs: {idle_vo_jobs}
GLUE2ComputingShareRunningJobs: {running_vo_jobs}
GLUE2ComputingShareComputingServiceForeignKey: {central_manager}
GLUE2ComputingShareComputingEndpointForeignKey: {endpointid}
GLUE2ShareServiceForeignKey: {central_manager}
GLUE2ShareEndpointForeignKey: {endpointid}
{resource_keys}
"""


POLICY_LDIF = """dn: GLUE2PolicyID={policyid},GLUE2ShareID={shareid},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Policy
objectClass: GLUE2MappingPolicy
GLUE2PolicyID: {policyid}
GLUE2PolicyScheme: org.glite.standard
GLUE2PolicyRule: vo:{vo}
GLUE2MappingPolicyShareForeignKey: {shareid}

dn: GLUE2PolicyID={policyid}_access,GLUE2ShareID={shareid},{bind_dn}
objectClass: GLUE2Entity
objectClass: GLUE2Policy
objectClass: GLUE2AccessPolicy
GLUE2PolicyID: {policyid}_access
GLUE2PolicyScheme: org.glite.standard
GLUE2PolicyRule: vo:{vo}
GLUE2AccessPolicyEndpointForeignKey: {endpointid}
"""


class TimeoutError(Exception):
    """
    Dummy timeout exception class.
    """
    pass


# pylint: disable=unused-argument
def handler(signum, frame):
    """
    Handler for timeout signal.
    """
    raise TimeoutError("TimeoutError")

def main():
    """
    Main provider routine.
    """

    # Get hostname of the batch system central manager
    central_manager = htcondor.param.get('COLLECTOR_HOST').split(' ')[0]


    # Get VO Names
    vonames = htcondor.param.get('HTCONDORCE_VONames')
    if not vonames:
        sys.stderr.write("Error: HTCONDORCE_VONames not set\n")
        sys.exit(1)
    vonames = vonames.split(',')


    # Get Site Name
    site_name = htcondor.param.get('HTCONDORCE_SiteName')
    if not site_name:
        sys.stderr.write("Error: HTCONDORCE_SiteName: not set\n")
        sys.exit(1)


    # Get the timeout value
    time_out = htcondor.param.get('GLUE_PROVIDER_TIMEOUT')
    if not time_out:
        time_out = 10


    # This is the bind DN for all entries
    bind_dn = "GLUE2ServiceID=%s,GLUE2GroupID=resource,o=glue" % (central_manager)


    # Query collector for the number of CPUs and batch system Collector ad
    coll = htcondor.Collector()
    total_cores = {}
    topologies = {}
    for classad in coll.query(htcondor.AdTypes.Startd, 'State=!="Owner"',
                              ['Arch', 'OpSys', 'OpSysMajorVer', 'OpSysName',
                               'DetectedCpus', 'DetectedMemory', 'Machine']):
        if not classad.get('Machine'):
            continue  # skip malformed ads where we can't provide additional information

        try:
            if classad['Machine'] not in total_cores:
                total_cores[classad['Machine']] = classad['DetectedCpus']

            machine = (
                classad['Arch'].lower(),
                classad['OpSys'].lower(),
                classad['OpSysName'],
                classad['OpSysMajorVer'],
                classad['DetectedCpus'],
                classad['DetectedMemory']
            )
            if machine not in topologies:
                topologies[machine] = 1
            else:
                topologies[machine] += 1
        except KeyError, exc:
            msg = "Malformed machine ad: Missing '{0}' attribute for {1}"\
                   .format(exc, classad['Machine'])
            sys.stderr.write(msg)

    resources = []


    # Print the entry for the GLUE2 Resource
    for tup in topologies:
        resource = '{0}_{1}_{2}_{3}_{4}_{5}'.format(tup[0], tup[1], tup[2], tup[3], tup[4], tup[5])
        instances = topologies[tup]
        resources.append(resource)

        print (RESOURCE_LDIF.format(
            central_manager=central_manager,
            resource=resource,
            arch=tup[0],
            os=tup[1],
            name=tup[2],
            version=tup[3],
            memory=tup[5],
            cpu=tup[4],
            bind_dn=bind_dn,
            instances=instances,
        ))


        # Get from the configuration types and values of the benchmarks
        specs = ca.ClassAd(htcondor.param.get('HTCONDORCE_SPEC')) or []
        for spec_type, spec_value in specs.items():
            spec_type = spec_type.strip().replace('_', '-')
            print(SPEC_LDIF.format(
                central_manager=central_manager,
                resource=resource,
                spec_type=spec_type,
                spec_value=spec_value
            ))



    coll_ad = coll.query(htcondor.AdTypes.Collector)[0]  # the pool collector ad
    version = coll_ad['CondorVersion'].split()[1]


    # Print the entry for the GLUE2 Manager
    print (MANAGER_LDIF.format(
        central_manager=central_manager,
        bind_dn=bind_dn,
        version=version,
        total_cores=sum(total_cores.values()),
        ))

    ce_batch_schedd_ads = coll.query(
        htcondor.AdTypes.Schedd,
        'HAS_HTCONDOR_CE =?= True',
        ['Machine']
    )


    # Print the entry for the GLUE2 Service
    print (SERVICE_LDIF.format(
        central_manager=central_manager,
        bind_dn=bind_dn,
        num_endpoints=len(ce_batch_schedd_ads),
        num_shares=len(vonames),
        site_name=site_name,
        ))

    for ce_batch_schedd_ad in ce_batch_schedd_ads:

        ce_host = ce_batch_schedd_ad['Machine']
        # find the CE using the default CE port
        ce_collector = htcondor.Collector(ce_host + ':9619')
        try:
            ce_schedd_ad = ce_collector.query(
                htcondor.AdTypes.Schedd,
                'Name =?= "{0}"'.format(ce_host)
            )[0]

        except (RuntimeError, IndexError):
            sys.stderr.write("Unable to locate CE schedd on %s\n" % ce_host)
            continue
        except EnvironmentError:
            sys.stderr.write("Failed communication with CE collector on %s\n" % ce_host)
            continue

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(time_out)
        try:
            if htcondor.SecMan().ping(ce_schedd_ad, "READ")['AuthorizationSucceeded']:
                state = 'ok'
            else:
                state = 'warning'
            state_info = 'Authorization ping successful'
        except (KeyError, RuntimeError):
            state = 'critical'
            state_info = 'Authorization ping failed'
        except TimeoutError as exc:
            sys.stderr.write("Ping to CE schedd on %s timed out after %i s.\n"
                             % (ce_host, time_out))
            continue

        signal.signal(signal.SIGALRM, signal.SIG_IGN)

        ce_schedd = htcondor.Schedd(ce_schedd_ad)

        try:
            query = ce_schedd.xquery(projection=["JobStatus", "x509userproxyvoname"])
        except RuntimeError, exc:
            sys.stderr.write("%s: %s\n" % (exc, ce_host))
            continue

        idle_vo_jobs = defaultdict(int)
        running_vo_jobs = defaultdict(int)
        total_vo_jobs = defaultdict(int)

        signal.signal(signal.SIGALRM, handler)
        signal.alarm(time_out)
        try:
            for job in query:
                if not job.get("JobStatus") or not job.get("x509userproxyvoname"):
                    continue
                total_vo_jobs[job['x509userproxyvoname']] += 1
                if job['JobStatus'] == 1:
                    idle_vo_jobs[job['x509userproxyvoname']] += 1
                elif job['JobStatus'] == 2:
                    running_vo_jobs[job['x509userproxyvoname']] += 1
        except TimeoutError as exc:
            sys.stderr.write("CE schedd on %s timed out after %i s.\n" % ce_host, time_out)
            continue

        signal.signal(signal.SIGALRM, signal.SIG_IGN)

        cmd = ['/usr/bin/openssl', 'x509', '-noout', '-issuer', '-nameopt', 'RFC2253', '-in',
               '/etc/grid-security/hostcert.pem']
        cmd_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        issuer = cmd_proc.communicate()[0].replace('issuer=', '').strip()

        name = ce_schedd_ad['Name']
        start_time = datetime.fromtimestamp(
            int(ce_schedd_ad['DaemonStartTime'])
        ).strftime('%Y-%m-%dT%H:%M:%SZ')

        print (ENDPOINT_LDIF.format(
            name=name,
            bind_dn=bind_dn,
            version=version,
            state=state,
            state_info=state_info,
            start_time=start_time,
            issuer=issuer,
            central_manager=central_manager,
            ))


        for voname in vonames:
            voname = voname.strip()
            shareid = "%s_%s_share" % (ce_host, voname)
            endpointid = "%s_HTCondorCE" % (ce_host)
            total_jobs = total_vo_jobs.get(voname, 0)
            idle_jobs = idle_vo_jobs.get(voname, 0)
            running_jobs = running_vo_jobs.get(voname, 0)
            resource_keys = ""
            for resource in resources:
                resource_keys += 'GLUE2ComputingShareExecutionEnvironmentForeignKey: {0}_{1}\n'\
                    .format(central_manager, resource)
                resource_keys += 'GLUE2ShareResourceForeignKey: {0}_{1}\n'\
                    .format(central_manager, resource)
            print (SHARE_LDIF.format(
                shareid=shareid,
                bind_dn=bind_dn,
                total_vo_jobs=total_jobs,
                idle_vo_jobs=idle_jobs,
                running_vo_jobs=running_jobs,
                central_manager=central_manager,
                endpointid=endpointid,
                resource_keys=resource_keys,
                ))

            policyid = "%s_%s_policy" % (ce_host, voname)
            print (POLICY_LDIF.format(
                policyid=policyid,
                shareid=shareid,
                bind_dn=bind_dn,
                vo=voname,
                endpointid=endpointid
                ))

if __name__ == '__main__':
    main()
