vacjson-cgi

#!/usr/bin/python
#
#  vacjson-cgi - Vac CRR JSON CGI script
#
#  Andrew McNab, University of Manchester.
#  Copyright (c) 2015-9. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or
#  without modification, are permitted provided that the following
#  conditions are met:
#
#    o Redistributions of source code must retain the above
#      copyright notice, this list of conditions and the following
#      disclaimer. 
#    o Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials
#      provided with the distribution. 
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
#  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
#  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
#  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
#
#  Contacts: Andrew.McNab@cern.ch  http://www.gridpp.ac.uk/vacproject/
#

# First module installed with:
#
# pip install elasticsearch
#
import elasticsearch

import os
import cgi
import sys
import stat
import time
import json
import string
import pprint

import vacmon

esDebug   = False
esTimeout = 120
timeSpan  = 86400

def htmlHeader(title):
  print 'Status: 200 OK'
  print 'Content-Type: text/html'
  print
  print '<html><head><title>' + title + '</title>'
  print '</head><body>'
#  print open('/var/www/vacmon/vacjson-header.html', 'r').read()

def htmlFooter():
#  print open('/var/www/vacmon/vacjson-footer.html', 'r').read()
  print '</body>'
  print '</html>'

def listSites():
  htmlHeader('VacJson')

  es = elasticsearch.Elasticsearch(timeout = esTimeout)

  sitesSearch =  """
{
  "query": 
    {
      "bool":
        {
          "filter":
            [
              { "range": { "time_received": { "from": %ld } } }
            ]
        }
    },

  "aggs": 
    {
      "sites":
        {
          "terms":
            {
              "field": "site",
              "size" : 99999,
              "order" : { "_term" : "asc" }
            }
        }
    }    
}
""" % (int(time.time()) - timeSpan)

  ss = es.search(
          index       = 'factories',
          size        = 0,
          body        = sitesSearch)

  if esDebug:
      print '<pre>'
      pprint.pprint(json.loads(sitesSearch))
      print
      pprint.pprint(ss)
      print '</pre>'

  for j in ss['aggregations']['sites']['buckets']:
    print '<a href="/%s/">%s</a><br>' % (str(j['key']), str(j['key']))

  htmlFooter()

def listSpaces(siteName):
  htmlHeader('VacJson')

  es = elasticsearch.Elasticsearch(timeout = esTimeout)

  spacesSearch =  """
{
  "query": 
    {
      "bool":
        {
          "filter":
            [
              { "range": { "time_received": { "from": %ld } } },
              { "match": { "site": "%s" } }
            ]
        }
    },

  "aggs": 
    {
      "spaces":
        {
          "terms":
            {
              "field": "space",
              "size" : 99999,
              "order" : { "_term" : "asc" }
            }
        }
    }    
}
""" % (int(time.time()) - timeSpan, siteName)

  ss = es.search(
          index       = 'factories',
          size        = 0,
          body        = spacesSearch)

  if esDebug:
      print '<pre>'
      pprint.pprint(json.loads(spacesSearch))
      print
      pprint.pprint(ss)
      print '</pre>'

  for j in ss['aggregations']['spaces']['buckets']:
    print '<a href="/%s/%s/crr.json">%s/%s</a><br>' % (siteName, str(j['key']), siteName, str(j['key']))

  htmlFooter()

def saveCrrJson(siteName, spaceName):

  es = elasticsearch.Elasticsearch(timeout = esTimeout)
  
  factoriesSearch =  """
{
  "query": 
    {
      "bool":
        {
          "filter":
            [
              { "range": { "time_received": { "from": %ld } } },
              { "match": { "site": "%s" } },
              { "match": { "space": "%s" } }
            ]
        }
    },

  "aggs": 
    {
      "factories":
        {
          "terms":
            {
              "field": "factory",
              "size" : 99999,
              "order" : { "_term" : "asc" }
            },
          "aggs": 
            { 
              "most_recent_message": 
                { 
                  "top_hits": 
                    { 
                      "size" : 1,
                      "sort" : [ { "time_received": { "order": "desc" } } ],
                      "docvalue_fields" : [ "time_received", "max_processors", "max_hs06", "mem_total_kb", "daemon_version" ]
                    }
                } 
            }
        }
    }    
}
""" % (int(time.time()) - timeSpan, siteName, spaceName)

  fs = es.search(
          index       = 'factories',
          size        = 0,
          body        = factoriesSearch)

#  print '<!-- fs took %d -->' % fs['took']

  if esDebug:
      print '<pre>'
      pprint.pprint(json.loads(factoriesSearch))
      print
      pprint.pprint(fs)
      print '</pre>'

#  es = elasticsearch.Elasticsearch(timeout = esTimeout)
  
  machinetypesSearch =  """
{
  "query": 
    {
      "bool":
        {
          "filter":
            [
              { "range": { "time_received": { "from": %ld } } },
              { "match": { "site": "%s" } },
              { "match": { "space": "%s" } }
            ]
        }
    },

  "aggs": 
    {
      "machinetypes":
        {
          "terms":
            {
              "field": "machinetype",
              "size" : 99999,
              "order" : { "_term" : "asc" }
            },
          "aggs": 
            { 
              "most_recent_message": 
                { 
                  "top_hits": 
                    { 
                      "size" : 1,
                      "sort" : [ { "time_received": { "order": "desc" } } ],
                      "docvalue_fields" : [ "time_received", "max_processors", "max_wallclock_seconds", "bytes_per_processor", "fqan", "factory" ]
                    }
                } 
            }
        }
    }    
}
""" % (int(time.time()) - timeSpan, siteName, spaceName)

  ms = es.search(
          index       = 'machinetypes',
          size        = 0,
          body        = machinetypesSearch)

#  print '<!-- ms took %d -->' % fs['took']

  if esDebug:
      print '<pre>'
      pprint.pprint(json.loads(machinetypesSearch))
      print
      pprint.pprint(ms)
      print '</pre>'

  # Now assemble info for the CRR JSON

  totalHS06       = 0
  totalProcessors = 0 

  daemonVersion   = '?'
  timeReceived    = 0

  for j in fs['aggregations']['factories']['buckets']:
    if j['most_recent_message']['hits']['hits'][0]['fields']['time_received'][0] > timeReceived:
      daemonVersion = str(j['most_recent_message']['hits']['hits'][0]['fields']['daemon_version'][0])
      timeReceived  = j['most_recent_message']['hits']['hits'][0]['fields']['time_received'][0]

    try:
      totalHS06       += j['most_recent_message']['hits']['hits'][0]['fields']['max_hs06'][0]
    except:
      pass
    
    try:
      totalProcessors += j['most_recent_message']['hits']['hits'][0]['fields']['max_processors'][0]
    except:
      pass

  # Shares (queues) from machinetypes
  
  shares = {}
  
  for j in ms['aggregations']['machinetypes']['buckets']:    
    shareName               = str(j['key'])
    shares[shareName]       = { 'type' : 'machinetype' }

    try:
      shares[shareName]['assigned_vos'] = [ str(j['most_recent_message']['hits']['hits'][0]['fields']['fqan'][0]).split('/')[1] ]
    except:
      pass
    
    try:
      shares[shareName]['max_walltime_minutes'] = j['most_recent_message']['hits']['hits'][0]['fields']['max_wallclock_seconds'][0] / 60
    except:
      pass
      
    try:
      # GB = 10^9 bytes; not GB = 1024^3. Is this what we want???
      shares[shareName]['max_main_memory_GB'] = ( j['most_recent_message']['hits']['hits'][0]['fields']['max_processors'][0]
                                                  * j['most_recent_message']['hits']['hits'][0]['fields']['bytes_per_processor'][0] ) / 1000000000
    except:
      pass

  # Now begin building the JSON for one space ("resource"), as a Python dictionary

  resJSON = { 'publication_time': int(time.time()) }

  resJSON['shares']               = shares
  #        vvvv captial S???
  resJSON['site']                 = siteName
  resJSON['type']                 = 'vacuum'
  resJSON['status']               = 'production'
  resJSON['number_logical_cpus']  = totalProcessors
  
  if totalHS06 > 0:
    resJSON['capacity_HS06']      = int(totalHS06)

  resJSON['accesspoints']         = { spaceName : { "endpoint_urls" : [ spaceName ] } }

  if daemonVersion.startswith('Vac '):  
    resJSON['accesspoints'][spaceName]['flavour'] = 'uk.ac.gridpp.vac'
    resJSON['accesspoints'][spaceName]['version'] = daemonVersion.split()[1]
  elif daemonVersion.startswith('Vcycle '):  
    resJSON['accesspoints'][spaceName]['flavour'] = 'uk.ac.gridpp.vcycle'
    resJSON['accesspoints'][spaceName]['version'] = daemonVersion.split()[1]
  else:
    resJSON['accesspoints'][spaceName]['flavour'] = daemonVersion.split()[0]
    resJSON['accesspoints'][spaceName]['version'] = daemonVersion
      
  resJSON['jobmanager']         = resJSON['accesspoints'][spaceName]['flavour']
  resJSON['jobmanager_version'] = resJSON['accesspoints'][spaceName]['version']

  try:
    os.makedirs('/var/www/vacjson/' + siteName + '/' + spaceName)
  except Exception as e:
    sys.stderr.write(str(e))

  crrJSON = { "computingresources" : { spaceName : resJSON } }

  try:
    vacmon.vacutils.createFile('/var/www/vacjson/' + siteName + '/' + spaceName + '/crr.json', 
                             json.dumps(crrJSON, indent=2), stat.S_IWUSR + stat.S_IRUSR + stat.S_IRGRP + stat.S_IROTH, '/var/www/tmp')
  except Exception as e:
    sys.stderr.write(str(e))

def showCrrJson(siteName, spaceName):

  try:
    # Try to create and cache the CRR JSON for this space
    saveCrrJson(siteName, spaceName)
  except Exception as e:
    sys.stderr.write(str(e))

  try:
    # Even if creating the JSON file fails, try to serve up the most recent copy
    s = open('/var/www/vacjson/' + siteName + '/' + spaceName + '/crr.json', 'r').read()
  except:
    print 'Status: 404 Not Found'
    print 'Location: http://vacjson.gridpp.ac.uk/'
    print
  else:
    print 'Status: 200 OK'
    print 'Content-Type: application/json'
    print
    print s
    
#
# PROGRAM MAIN
#

vacjsonVersion = open('/var/www/vacmon/VERSION','r').readline().split('=',1)[1].strip()
requestURI     = os.environ['REQUEST_URI']

if '..' in requestURI or string.translate(requestURI, None, '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-./') != '':
  # Redirect to main page if unacceptable characters in URI
  print 'Status: 302 Found 0'
  print 'Location: http://vacjson.gridpp.ac.uk/'
  print
  sys.exit(0)

siteName, spaceName, fileName = (requestURI + '////').split('/')[1:4]

if not siteName:
  # List of sites
  listSites()
  sys.exit(0)

if not spaceName:
  # List of spaces in a site
  listSpaces(siteName)
  sys.exit(0)

if fileName == 'crr.json':
  # CRR for one space at a site
  showCrrJson(siteName, spaceName)
  sys.exit(0)

# Something not right
print 'Status: 302 Found 0'
print 'Location: http://vacjson.gridpp.ac.uk/'
print
sys.exit(0)