'''//Copyright (C) 2011 Martin Biermann
  // Modified 2011 Simon Kiertscher
  //
  //This program is free software; you can redistribute it and/or modify it 
  //under the terms of the GNU General Public License as published by the 
  //Free Software Foundation; either version 3 of the License, or (at your option) 
  //any later version.
  //
  //This program is distributed in the hope that it will be useful, but WITHOUT 
  //ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
  //FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  //
  //You should have received a copy of the GNU General Public License along with 
  //this program; if not, see <http://www.gnu.org/licenses/>.''' 

import subprocess
import string
import os
import re
from datetime import datetime
import syslog
import sys

''' Cherub methods '''
# See helper methods below

def cherub_global_load():
	return 0;

'''try to build a cache to speedup the load function'''
hostsToServeLoad = []
time_stamp = datetime.now()
firsttime = 1
'''add a syslog function'''
#sys.path.append("/etc/")
#import cherub_config
syslog.openlog("cherubim_script_lsf",0,syslog.LOG_LOCAL4)
	
def cherub_node_load(node):
	try:
		# Determine all hosts that idle or that are offline.
		global time_stamp
		global hostsToServeLoad
		global firsttime
		c=datetime.now()-time_stamp
		if c.seconds < 15 and firsttime == 0 :	# if the last timestamp is younger then 15 seconds, the list is current
			#print 'timestamp is this old:'+str(c.seconds)+'\n'
			return int(hostsToServeLoad.count(node) > 0)
		else:				#otherwise recalculate it
			firsttime = 0
			#print 'calculate the list new!\n'
			hosts = availableHosts()
			if hosts == -1 :
				return -1

			# Set up a field of hosts able to serve load in general.
			hostsToServeLoad = []
			
			# Get a list of all queues in the cluster.
			queues = allQueues()
			if queues == -1 :
				return -1
			
			# Foreach queue first get all pending jobs, sum the number of cores to let 
			# them work (requested on submission) and decrease the sum by each serving 
			# host's number of cores it contains.
			
			for queue in queues :
				#DEBUG:
				#print "prozessing queue:"+str(queue)
				pendingJobs = pendingJobsInQueue(queue)
				if pendingJobs == -1 :
					return -1
				
				# If there are no pending jobs we can quit here
				if len(pendingJobs) == 0:
					#print 'QUEUE: '+queue+' NO PENDING\n'
					continue	
				
				# find out which hosts are serving the queue
				hostsServingTheQueue = hostsServingQueue(queue)
				if hostsServingQueue == -1 :
					return -1
				
				# and which of them are available
				hostsServingTheQueue = list(set(hostsServingTheQueue)&set(hosts))
					
				# if the queue has no serving hosts, we can skipt further calculations
				if len(hostsServingTheQueue) == 0:
					#print 'QUEUE: '+queue+' NO HOSTS\n'
					continue
				
				#print 'QUEUE: '+queue+'\n'

				# if we calculate this number her, we have to calculatet it only once and not for every job
				highestNumberOfCoresInTheQueue = highestNumberOfCoresInQueue(queue)
				
				numberOfCoresNeeded = 0
				
				for pendJobID in pendingJobs:
					is_direct = 0
					# Expect all directly specified hosts in the pending job to serve load
					specifiedHostsInPendJob = specifiedHostsInPendingJob(pendJobID)
					#DEBUG:
					#print "specifiedHostsInPendJob:"+str(specifiedHostsInPendJob)
					for host in specifiedHostsInPendJob :
						is_direct = 1
						if hosts.count(host) > 0 :
							hosts.remove(host)
							hostsToServeLoad.append(host)
							#DEBUG:
							#print "added node[d]:"+str(host)
					
					#start_time=datetime.now()
					#end_time=datetime.now()
					#c=end_time - start_time
					#print '1. seconds:'+str(c.seconds)+'.'+str(c.microseconds)+'\n'

					jobsCoreConsumption = jobCoreConsumption(pendJobID,highestNumberOfCoresInTheQueue,is_direct)
					if jobsCoreConsumption == -1 :
						return -1
					
					#DEBUG:
					#print 'JOB:'+str(pendJobID)+' # '+str(jobsCoreConsumption)
					numberOfCoresNeeded += jobsCoreConsumption
					
					#print 'numberOfCoresNeeded:'+str(numberOfCoresNeeded)+'\n'
					
					# Check for each serving host if it is still available in the major hosts 
					# list or busy. If it is not than decrease the left number of need cores 
					# by the number it is equipped with and move it from the major list of 
					# hosts in the cluster to the one of hosts planned to request to work out
					# the queues' pending load.
					
					# if we do this loop every time immediately and break if there are no hosts left, we dont
					# have to prove every pending node.
					
					for host in hostsServingTheQueue :
						#print 'HOSTS:'+str(hosts)+'\n'
						#print 'HOST:'+str(host)+'\n'
						# Stop looping when there are no more cores needed
						if (numberOfCoresNeeded <= 0) :
							break
						
						#to prove it here again is overkill	
						#theHostIsBusy = hostIsBusy(host)
						#if theHostIsBusy == -1 :
						#	return -1
					
						#if not theHostIsBusy and (hosts.count(host) > 0) :
						if hosts.count(host) > 0 :
						
							kNumberOfCoresInHost = numberOfCoresInHost(host)
							if kNumberOfCoresInHost == -1 :
								return -1
							
							numberOfCoresNeeded -= kNumberOfCoresInHost
						
							hosts.remove(host)
							hostsServingTheQueue.remove(host)
							hostsToServeLoad.append(host)
							#DEBUG:
							#print "added node[i]:"+str(host)
							
							
					if len(hostsServingTheQueue) == 0:
						#print 'HOSTS:'+str(hosts)+'\n'
						#print 'numberOfCoresNeeded:'+str(numberOfCoresNeeded)+'\n'
						#print 'QUEUE: '+queue+' NO HOSTS REMAINING ... going on\n'
						break
			
			# Now it is clear that the node has load if it is listed in the 
			# list of hosts need to work out the queues' pending load.
			
		time_stamp = datetime.now()
		#DEBUG:
		#print "hostsToServeLoad:"+str(hostsToServeLoad)
		#print "-------------------------------------"
		return int(hostsToServeLoad.count(node) > 0)
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node)+", an unexpected error occured in function node_load:"+str(sys.exc_info()[0]));
		return 0
		
		
def cherub_boot(node_address):
	try:
		''' Boot the given node. '''
		applianceCommand = '"sudo /usr/sbin/appliance nodes::command=%s::poweron"' % node_address
		bootCommand = 'ssh meggy %s' % applianceCommand
		on = subprocess.Popen(bootCommand,shell=True,stdout=open("/dev/null","w"),stderr=subprocess.PIPE)
		onErr = on.communicate('test1')
		# Terminate the process if either the process or the command failed
		if (on.returncode < 0) or (onErr.count("nodes::command=failed") > 0):
			#due to problems with the appliance system, ignor errors
			#and return also 0
			return 0
		return 0
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_address)+", an unexpected error occured in function cherub_boot:"+str(sys.exc_info()[0]));
		return 1

def cherub_shutdown(node_address):
	try:
		''' Shutdown the given node. '''
		applianceCommand = '"sudo /usr/sbin/appliance nodes::command=%s::shutdown"' % node_address
		offCommand = 'ssh meggy %s' % applianceCommand
		off = subprocess.Popen(offCommand,shell=True,stdout=open("/dev/null","w"),stderr=subprocess.PIPE)
		offErr = off.communicate('text1')
		if (off.returncode < 0) or (offErr.count("nodes::command=failed") > 0):
			#due to problems with the appliance system, ignor errors
			#and return also 0
			return 0
		return 0
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_address)+", an unexpected error occured in function cheurb_shutdown:"+str(sys.exc_info()[0]));
		return 1

	
def cherub_sign_off(node_address):
	try:
		''' Close the node from the cluster administration. '''
		command = 'ssh glic "badmin hclose %s"' % node_address
		# Note that this app writes success messages on stderr.
		close = subprocess.Popen(command,shell=True,stdout=open("/dev/null", "w"),stderr=subprocess.STDOUT)
		close.communicate();
		if close.returncode < 0 :
			return -1
		return 0 
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_address)+", an unexpected error occured in function cheurb_sign_off:"+str(sys.exc_info()[0]));
		return 1

def cherub_register(node_address):
	try:
		''' Open the node from the cluster administration. '''
		command = 'ssh glic "badmin hopen %s"' % node_address
		# Note that this app writes success messages on stderr.
		register = subprocess.Popen(command,shell=True,stdout=open("/dev/null", "w"),stderr=subprocess.STDOUT)
		register.communicate();
		if register.returncode < 0 :
			return -1
		return 0
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_address)+", an unexpected error occured in function cheurb_register:"+str(sys.exc_info()[0]));
		return 1
	
def cherub_status(node_name):
	try:
		'''must return the following states
	    CHERUB_UNKNOWN = -1 = if the state of the node is unknown or an error occures
	    CHERUB_BUSY    =  0 = if the node is booted and BUSY/WORKING and REGISTERT to the RMS
	    CHERUB_ONLINE  =  1 = if the node is booted but IDLE and REGISTERT to the RMS
	    CHERUB_OFFLINE =  2 = if the node is booted but NOT REGISTERT to the RMS
	    CHERUB_DOWN    =  3 = if the node is shutdown and NOT REGISTERT to the RMS
	    '''
		
		# make a quick look at the beginning if jobs are running on the machine
		# if yes we can return the BUSY state
		command = 'bjobs -u all -m  %s' % node_name
		jobsOnNode = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdOut, stdErr = jobsOnNode.communicate()
		if (stdOut != ""):
			return 0
		
		# if not, we have to analyse it deeper
		command = 'bhosts -Xw %s | awk \'NR!=1 { print $2 }\'' % node_name
		getStatus = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdOut, stdErr = getStatus.communicate()
		if (getStatus.returncode < 0):
			syslog.syslog(syslog.LOG_INFO, "getStatus crashed: "+str(node_name)+" --- stdErr: "+str(stdErr)+" --- stdOut: "+str(stdOut));
			return -1
		#if (stdErr != ""):
		#	syslog.syslog(syslog.LOG_INFO, "["+str(node_name)+"] --- bhosts command has error output  --- stdErr: "+str(stdErr)+" --- stdOut: "+str(stdOut));
			
		status = stdOut.rstrip('\n')
		if status == "ok" :
			isHostBusy = hostIsBusy(node_name)
			if isHostBusy == -1 :
				syslog.syslog(syslog.LOG_INFO, "unknown reason for node:"+str(node_name)+" --- stdErr: "+str(stdErr)+" --- status ok but hostisbusy crashed --- stdOut: "+str(stdOut));
				return -1
			if isHostBusy :
				return 0
			else :
				return 1
		# There is only one closed state (closed_Adm) we know to handle appropriately for CHERUB
		elif status in ["closed_Adm","unavail","unreach"] :
			isHostDown = hostIsDown(node_name)
			if isHostDown == -1 :
				syslog.syslog(syslog.LOG_INFO, "unknown reason for node:"+str(node_name)+" --- stdErr: "+str(stdErr)+" --- ping command crashed --- stdOut: "+str(stdOut));
				return -1
			if isHostDown > 0 :
				return 3
			else:
				return 2
		elif status in ["closed_Full","closed_Excl"] :
			return 0
		# Catch all left states
		return 0
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_name)+", an unexpected error occured in function cheurb_status:"+str(sys.exc_info()[0]));
		return -1

''' Helper methods '''

def hostIsBusy(host):
	try:
		''' Check a host being busy. '''
		command = 'bhosts -Xw %s | awk \'NR!=1{print (($2=="ok" || $2=="closed_Adm") && $5==0) ? 1 : 0}\'' % host
		getHostIdles = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getHostIdles.communicate()
		if (getHostIdles.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", getHostIdles has crashed, stderr:"+str(stderr)+" stdout:"+str(stdout));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", getHostIdles has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		return (stdout.rstrip('\n') != '1')
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", an unexpected error occured in function hostIsBusy:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def hostIsDown(host):
	try:
		''' Check a host being offline by pinging its address on the network. '''
		ping = subprocess.Popen('ping -c 1 %s' % host,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = ping.communicate()
		if (ping.returncode < 0) or (stderr != ''):
			return -1
		return (stdout.count('0 received') > 0)
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", an unexpected error occured in function hostIsDown:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def availableHosts():
	try:
		''' Get an array of all hosts that have either "ok" oder "closed_Adm" state. '''
		command = 'bhosts -Xw | awk \'NR!=1 { if ( ($2=="ok" || $2=="closed_Adm" || $2=="unavail" ||$2=="unreach") && ($5=="0") && ($6=="0") ) print $1 }\''
		getAllHosts = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout,stderr = getAllHosts.communicate()
		if (getAllHosts.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "getHostIdles has crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", getAllHosts has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		hosts = stdout.split('\n')
		del hosts[len(hosts)-1]
		return hosts
	except:
		syslog.syslog(syslog.LOG_ERR, "An unexpected error occured in function availableHosts:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def allQueues():
	try:
		''' Get an array of all queues in the cluster. '''
		getQueues = subprocess.Popen('bqueues | awk \'NR!=1{print $1}\'',shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout,stderr = getQueues.communicate()
		if (getQueues.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "getQueues has crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", getQueues has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		queues = stdout.split('\n')
		del queues[len(queues)-1]
		return queues
	except:
		syslog.syslog(syslog.LOG_ERR, "An unexpected error occured in function allQueues:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def hostGroupsServingQueue(queue):
	try:
		''' Get an array of all host groups serving the given queue. '''
		# NOTE We don't yet handle relevance extensions on the queue specifications.
		command = 'bqueues -l %s | grep \'HOSTS\' | sed \'s/HOSTS:[\ ]*//\' | sed \'s/\+[0-9]//g\' | sed \'s/\///g\'' % queue
		getQueueHostGroups = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getQueueHostGroups.communicate()
		if (stderr != '') or (getQueueHostGroups.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", getQueueHostsGroups has crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", getQueueHostGroups has errorout:"+str(stderr)+" stdout is:"+str(stdout));
		hostgroups = stdout.rstrip('\n').split(' ')
		del hostgroups[len(hostgroups)-1]
		return hostgroups
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", an unexpected error occured in function hostGroupsServingQueue:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def hostGroupHosts(hostGroup):
	try:
		''' Get an array of all hosts in the given host group. '''
		command = 'bhosts -X %s | awk \'NR!=1 {print $1}\'' % hostGroup
		getHostsInHostGroup = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getHostsInHostGroup.communicate()
		if (getHostsInHostGroup.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(hostGroup)+", getHostInHostGroup has crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(hostGroup)+", getHostInHostGroup has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		groupHosts = stdout.split('\n')
		del groupHosts[len(groupHosts)-1]
		return groupHosts
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(hostGroup)+", an unexpected error occured in function hostGroupHosts:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1
	
def hostsServingQueue(queue):
	try:
		''' Get an array of hosts serving the given queue. '''
		hostGroups = hostGroupsServingQueue(queue)
		if hostGroups == -1 :
			return -1
		hostsServingQueue = []
		for group in hostGroups :
			groupHosts = hostGroupHosts(group)
			if groupHosts == -1 :
				return -1
			hostsServingQueue.extend(groupHosts)
		return hostsServingQueue
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", an unexpected error occured in function hostsServingQueue:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def pendingJobsInQueue(queue):
	try:
		''' Get an array of pending jobs' IDs in the given queue. '''
		command = 'bjobs -q %s -u all | awk \'NR!=1 { if ($3=="PEND") print $1 }\'' % queue
		getPendJobIDs = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getPendJobIDs.communicate()
		if ((stderr != '') and (stderr.count("No unfinished") == 0)):
			return -1
		if (getPendJobIDs.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", getPendJobIDs has crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
		pendingJobs = stdout.split('\n')
		del pendingJobs[len(pendingJobs)-1]
		return pendingJobs
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", an unexpected error occured in function pendingJobsInQueue:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def specifiedHostsInPendingJob(pendJobID):
	try:
		command = 'job=`bjobs -l %s | tr -d \'\n\ <>\'`; expr "$job" : \'.*\(SpecifiedHosts[^;]*\).*\' | sed \'s/SpecifiedHosts//\'' % pendJobID
		getSpecHosts = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getSpecHosts.communicate()
		if (getSpecHosts.returncode < 0) :
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(hostGroup)+", specifiedHostsInPendingJob crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(pendJobID)+", specifiedHostsInPendingJob has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		result = stdout.rstrip('\n') # cut off the newline symbol
		if len(result) == 0 :
			return []
		if result.count(',') > 0 :
			result = result.split(',')
		else :
			result = [result]
		return result
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(pendJobID)+", an unexpected error occured in function specifiedHostsInPendingJob:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def numberOfCoresInHost(host):
	try:
		command = 'lshosts %s | awk \'NR!=1 { print $5 }\'' % host
		getHostNumOfCPUs = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getHostNumOfCPUs.communicate()
		if (getHostNumOfCPUs.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", numberOfCoresInHost crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", numberOfCoresInHost has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		numCPUs = stdout.rstrip('\n')
		if numCPUs == '-' :
			return 0
		else :
			return int(numCPUs)
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(host)+", an unexpected error occured in function numberOfCoresInHost:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

def highestNumberOfCoresInQueue(queue):
	try:
		''' Scan all hosts serving the given queue by their core number. Determine 
		the maximum number of cores any machine serving that queue is equipped 
		with. '''
		hostGroupsServing = hostGroupsServingQueue(queue)
		if hostGroupsServing == -1 :
			return -1
		highestCoreNumber = 0
		for hostGroup in hostGroupsServing :
			hostsInHostGroup = hostGroupHosts(hostGroup)
			if hostsInHostGroup == -1 :
				return -1
			for host in hostsInHostGroup :
				numberOfCores = numberOfCoresInHost(host)
				if numberOfCores > highestCoreNumber :
					highestCoreNumber = numberOfCores
		return int(highestCoreNumber)
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(queue)+", an unexpected error occured in function highestNumberOfCoresInQueue:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1

# def queueForJob(jobID):
	# command = 'job=`bjobs -l %s | tr -d \'\n\ \'`;expr "$job" : \'.*Queue<\([^>]*\)>,.*\'' % jobID
	# # NOTE There cannot be more than one queue specified. (see manpage for bjobs)
	# getQueueForJob = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
	# stdout, stderr = getQueueForJob.communicate()
	# if (stderr != '') or (getQueueForJob.returncode < 0) :
		# print 'stderr:',stderr
		# return -1
	# return stdout.rstrip('\n')

def jobCoreConsumption(jobID,highestNumberOfCoresInTheQueue,is_direct):
	try:
		#DEBUG:
		#print "jobCoreConsumption("+str(jobID)+"-"+str(highestNumberOfCoresInTheQueue)+"-"+str(is_direct)+")"
		command = 'job=`bjobs -l %s | tr -d \'\n\ \'`; expr "$job" : \'.*\(#BSUB-n[0-9]*\).*\' | sed \'s/#BSUB-n//\'' % jobID
		getReqNumCPUs = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
		stdout, stderr = getReqNumCPUs.communicate()
		if (getReqNumCPUs.returncode < 0):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(jobID)+", jobCoreConsumption crashed, stdout:"+str(stdout)+" stderr:"+str(stderr));
			return -1
		if (stderr != ''):
			syslog.syslog(syslog.LOG_ERR, "While processing:"+str(jobID)+", jobCoreConsumption has errorout:"+str(stderr+" stdout is:"+str(stdout)));
		coreNumberString = stdout.rstrip('\n')
		# For lines that don't contain any job slot specs, which is marked with 
		# a "-" calculate the job's core consumption by the maximum core count a 
		# host in any serving host group contains (worst case strategy).
		if coreNumberString == "" :
			if highestNumberOfCoresInTheQueue == -1 :
				return -1
			#if a node is already specified in the direct way, dont add more nodes
			if is_direct == 1:
				return 0
			#print 'found NOT number of cores, using highestNumberOfCoresInTheQueue:'+str(highestNumberOfCoresInTheQueue)
			return highestNumberOfCoresInTheQueue
		else :
			#print 'found number of cores:'+str(coreNumberString)
			return int(coreNumberString)
	except:
		syslog.syslog(syslog.LOG_ERR, "While processing:"+str(jobID)+","+str(highestNumberOfCoresInTheQueue)+","+","+str(is_direct)+" an unexpected error occured in function jobCoreConsumption:"+str(sys.exc_info()[0]));
		syslog.syslog(syslog.LOG_ERR, "STDOUT:"+str(stdout));
		syslog.syslog(syslog.LOG_ERR, "STDERR:"+str(stderr));
		return -1
