# -*- coding: utf-8 -*-
#Copyright (C) 2010 Simon Kiertscher
#
#This program is free software; you can redistribute it and/or modify it 
#under the terms of the GNU General Public License as published by the 
#Free Software Foundation; either version 3 of the License, or (at your option) 
#any later version.
#
#This program is distributed in the hope that it will be useful, but WITHOUT 
#ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
#FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License along with 
#this program; if not, see <http://www.gnu.org/licenses/>.

import subprocess
import string
import re
import syslog
import sys
import traceback
import threading
import Queue
import time
import imp
import socket
import select

cherub_config = imp.load_source(sys.argv[0], sys.argv[1]+sys.argv[0]+".py")

# OfflineNodes = [] for old register and sign_off function
weights_of_nodes = []
boot_duration = cherub_config.boot_duration # XXX magic number, put this later in the config
linear_regression_history = 120 # in seconds (anzahl der datenpunkte fuer die LR entspricht dem Wert geteilt durch dem update intervall)
use_mean = True # not used in currernt http status mod version
status_lock = threading.Lock()	# used for virtual nodes

# wird benötigt um parallel die einzelnen stati einzutragen, ohne das die threads sich in die quere kommen
statusmap = {}
# wird benötigt um sich die Last der Knoten zu merken (letzte last, aktuelle last)
loadmap = {}
end_timepoint = linear_regression_history/cherub_config.update_everything
# the load is organised as a ring via modulo, so the past_timepoint startvalue is the largest value in the ring
current_timepoint = 0
past_timepoint = end_timepoint
# initing the maps with -1
for node in cherub_config.cluster:
	statusmap[node[0]] = -1
	loadmap[node[0]] = [-2 for x in range((linear_regression_history/cherub_config.update_everything) + 1)]

debugfile = open('cherub_local_debugfile.log','w')
debugfile.write("#timepoint\tload_returned\tslope\tactiveNodes\tActivConn\tLR_based_on\n")
debugstart = time.time()


def lineareRegression(pairs=[(0,0)]):
        # first, get the means of x and y
        x_mean = 0.0 
        y_mean = 0.0 
        for pair in pairs:
                x_mean = x_mean + pair[0]
                y_mean = y_mean + pair[1]
	#print "regression with "+str(len(pairs))+" pairs"
	if len(pairs) == 0:
		return (0,0,0,0)
        x_mean = x_mean / len(pairs)
        y_mean = y_mean / len(pairs)

        #print "x_mean:"+str(x_mean)
        #print "y_mean:"+str(y_mean)
            
        # calculate b, this is the slope (anstieg)
        numerator   = 0.0 
        denominator = 0.0 
        for pair in pairs:
                numerator   = numerator   + (pair[0] - x_mean) * (pair[1] - y_mean)
                denominator = denominator + (pair[0] - x_mean) * (pair[0] - x_mean)
	if denominator == 0:
		return (0,0,0,0)
        b = numerator / denominator
        # calculate a, this is the crossing with the y axis
        a = y_mean - ( b * x_mean )

        #print "the formular is: y_i ="+str(a)+"+"+str(b)+"*x_i"

        return (a,b,x_mean,y_mean)

def cherub_boot(node_address):
	'''this script has to give the command to boot the given node
	for IPMI: prameter: ipmi address --for-example--> node015.ipmi.cluster or 10.3.9.115
	insert the right password for the root behind -P
	(also possible with a magic package technology)
	'''
	is_virtual = False
	if 'V' in node_address:
		is_virtual = True
	if not is_virtual:
		proc = subprocess.Popen(["ipmitool", "-H", node_address, \
		"-U", "root", "-P", cherub_config.IPMIPassword, "power", "on"], \
		stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		stderr = proc.communicate()[1]
		if (stderr != ''):
			print 'ERROR in cherub_boot:', stderr
		return proc.returncode
	else:
		status_lock.acquire()
		s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
		s.connect("/tmp/cherub_communication")
		s.send("boot:"+node_address)
		s.close()
		status_lock.release()
		return 0


def cherub_shutdown(node_address):
	'''this script has to give the command to shutdown the given node
	for IPMI:prameter: ipmi address --for-example--> node015.ipmi.cluster or 10.3.9.115
	insert the right password for the root behind "-P"
	'''
	is_virtual = False
	if 'V' in node_address:
		is_virtual = True
	if not is_virtual:
		proc = subprocess.Popen(["ipmitool", "-H", node_address, \
		"-U", "root", "-P", cherub_config.IPMIPassword, "power", "soft"], \
		stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		stderr = proc.communicate()[1]
		if (stderr != ''):
			print 'ERROR in cherub_shutdown:', stderr
		return proc.returncode
	else:
		status_lock.acquire()
		s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
		s.connect("/tmp/cherub_communication")
		s.send("shutdown:"+node_address)
		s.close()
		status_lock.release()
		return 0

def cherub_sign_off(node_name):
	is_virtual = False
	if 'V' in node_name:
		is_virtual = True
	p = subprocess.Popen(["ipvsadm","-S"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	tuple = p.communicate()
	text = tuple[0]
	subtext = text.split('\n')
	for lines in subtext:
		# look for all entrys of the node and save them
		if lines.count(" "+str(node_name)+":") > 0:
			# save weight and the line (until -w)
			# weights_of_nodes = (line ,weight)
			weights_of_nodes.append((lines.split('-w')[0], lines.split()[7]))
			# build the command to sign off the node and the service he provides
			zwischending = lines.split()
			zwischending.pop(0)
			zwischending.pop(len(zwischending)-1)
			command = ["ipvsadm","-e"]
			command.extend(zwischending)
			command.extend("0")
			#print command
			p = subprocess.Popen(command,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			debug = p.communicate()
			#print "signoff:\n"+str(debug)
	# if the node is virtual (e.g. part of the environmental simulator) do this in addition
	if is_virtual:
		status_lock.acquire()
		s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
		s.connect("/tmp/cherub_communication")
		s.send("sign_off:"+node_name)
		s.close()
		status_lock.release()
	return 0

def cherub_register(node_name):
	is_virtual = False
	if 'V' in node_name:
		is_virtual = True
	p = subprocess.Popen(["ipvsadm","-S"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	tuple = p.communicate()
	text = tuple[0]
	subtext = text.split('\n')
	for lines in subtext:
		# search for the nodes and services to register
		if lines.count(" "+str(node_name)+":") > 0:
			# try to find old weight
			weight = ['1']
			for weights in weights_of_nodes:
				if lines.count(weights[0]) > 0:
					weight = weights[1]
					weights_of_nodes.remove(weights)
					# if the saved weight was for some reason 0, we set it to 1
					if weight == '0':
						weight = '1'
			# build the register command
			zwischending = lines.split()
			zwischending.pop(0)
			zwischending.pop(len(zwischending)-1)
			command = ["ipvsadm","-e"]
			command.extend(zwischending)
			command.extend(weight)
			#print command
			p = subprocess.Popen(command,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			debug = p.communicate()
			#print "register:\n"+str(debug)
	# if the node is virtual (e.g. part of the environmental simulator) do this in addition
	if is_virtual:
		status_lock.acquire()
		s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
		s.connect("/tmp/cherub_communication")
		s.send("register:"+node_name)
		s.close()
		status_lock.release()
	return 0

# small helper to increase given timepoint so it rotates, returnvalue is new value
def inc_loadmappointer(pointer):
	return (pointer + 1) % (end_timepoint + 1)


balancemap_file = open(str(time.time())+"-balancemap.log",'w')
def print_balancemap():
	for node in cherub_config.cluster:
		# standardcase
		if loadmap[node[0]][current_timepoint] > -1:
			# standardcase
			if loadmap[node[0]][past_timepoint] > -1:
				value = (loadmap[node[0]][current_timepoint] - loadmap[node[0]][past_timepoint]) / float(cherub_config.update_everything)
			# edgecase, first case after a register
			elif loadmap[node[0]][past_timepoint] == -1:
				value = loadmap[node[0]][current_timepoint] / float(cherub_config.update_everything)
			# errorcase
			else: # --> loadmap[node[0]][past_timepoint] < -1:
				print "This should not happen, loadmapentry is smaller then -1, loadmap is (1): "+str(loadmap)
				print "Loadmap of node "+str(node)+" is: "+str(loadmap[node[0]])
				print "Timepoints are: current: "+str(current_timepoint)+" past: "+str(past_timepoint)
		# case if down or signed off after down
		elif loadmap[node[0]][current_timepoint] == -1:
			value = 0
		# errorcase
		else: # --> loadmap[node[0]][current_timepoint] < -1:
			print "This should not happen, loadmapentry is smaller then -1, loadmap is (2): "+str(loadmap)
			print "Loadmap of node "+str(node)+" is: "+str(loadmap[node[0]])
			print "Timepoints are: current: "+str(current_timepoint)+" past: "+str(past_timepoint)

		if value < 0:
			print "This should not happen, value is smaller then 0, loadmap is (3): "+str(loadmap)
			print "Loadmap of node "+str(node)+" is: "+str(loadmap[node[0]])
			print "Timepoints are: current: "+str(current_timepoint)+" past: "+str(past_timepoint)
			value = 0
		balancemap_file.write(node[0]+str("\t")+str(value)+str("\t"))
	balancemap_file.write("\n")
	balancemap_file.flush()
	
	
# third parallel version, using nonblocking connects tcp requests and http request to the status_mod of
# the backend apache servers
def cherub_status_parallel():
	'''must return the following states
	CHERUB_UNKNOWN = -1 = if the state of the node is unknown or an error occures
	CHERUB_BUSY    =  0 = if the node is booted and BUSY/WORKING and REGISTERT to the RMS
	CHERUB_ONLINE  =  1 = if the node is booted but IDLE and REGISTERT to the RMS
	CHERUB_OFFLINE =  2 = if the node is booted but NOT REGISTERT to the RMS
	CHERUB_DOWN    =  3 = if the node is shutdown and NOT REGISTERT to the RMS
	'''
	
	#debug_start_timer = time.time()
	
	# fill the list with the name, so if a name is returned, you know which node has no status or failed
	status_list = [cherub_config.cluster[abc][0] for abc in range(len(cherub_config.cluster))]
	#print status_list
	socket_list = []
	fd_connectattempt_to_socket_list_index = {}
	fd_statusrequest_to_socket_list_index = {}
	
	global loadmap
	global statusmap
	#print loadmap
	#print statusmap
	
	# increase the timepoint variables by 1 and rotates on linear_regression_history
	global current_timepoint
	global past_timepoint
	current_timepoint = inc_loadmappointer(current_timepoint)
	past_timepoint = inc_loadmappointer(past_timepoint)
	
	# first, build the dynamic busy threshold
	running = 0
	for node in statusmap:
		if statusmap[node] == 0 or statusmap[node] == 1:
			running = running +1
	if running == 0:
		running = 1
	# this was the old implementation which was used for all measurements in publications so far
	#less_then_ActivConn_is_idle = cherub_config.overload - (cherub_config.overload / running)
	
	# this is the new implementation which considers also sequential shutdown.
	# all in all still a bit strange, better would be to check how many could be turned off up to
	# the configured threshold
	less_then_ActivConn_is_idle = (cherub_config.overload * (running - cherub_config.sequential_shutdown) / running
	
	# the less_then_ActivConn_ is_idle is minimal 80% of the overload value
	less_then_ActivConn_is_idle = min(less_then_ActivConn_is_idle, cherub_config.overload * 0.8)
	
	p = subprocess.Popen(["ipvsadm"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	
	# get all lines from ipvsadm
	lines = p.communicate()[0].split('\n')
	
	#debug_end_timer = time.time()
	#print "ipvsadm query: ", debug_end_timer - debug_start_timer
	
	# parse the lines of ipvsadm for the real nodes 
	# virtual nodes can only be added to ipvsadm if they are found in the /etc/hosts file!
	for line in lines:
		# the name is the first arguement after spliting the line, spliting on dot again
		# is necessary cause ipvsadm adds whole domain name
		process_index = ''
		if len(line.split())>=1:
			node_name = line.split()[1].split(':')[0]
			# if ip address
			if node_name.count('.') == 4 and node_name.split('.')[0].isdigit() and node_name.split('.')[1].isdigit() and node_name.split('.')[2].isdigit() and node_name.split('.')[3].isdigit():
				#print "found ip"
				process_index = status_list.index(line.split()[1].split(':')[0])
			elif status_list.count(node_name.split('.')[0])>0:
				#print "found name"
				node_name = node_name.split('.')[0]
				process_index = status_list.index(node_name)
			else:
				continue
		else:
			continue
		
		# weight > 0 --> registert
		# at the moment, it is ignort if it has activconn
		
		# creat socket for later use, either connection attempt or apache status request 
		# list has tuple of (node_index_in_status_list, socket)
		if 'V' in node_name:
			socket_helper = socket.socket(socket.AF_UNIX,socket.SOCK_STREAM)
		else:
			socket_helper = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
		socket_list.append([process_index, socket_helper])

		#print line
		
		# the sockets are distinguished by there list in which there are mapped
		# first, check its weight in ipvsadm
		if  int(line.split()[3]) > 0:
			# with this method, only mean load is possible
			# mapping fd for status requests to index in socket_list
			fd_statusrequest_to_socket_list_index[socket_helper.fileno()]=len(socket_list)-1
		else:
			# mapping fd for connection attempts to index in socket_list
			fd_connectattempt_to_socket_list_index[socket_helper.fileno()]=len(socket_list)-1

	'''der netzwerk part funktioniert wie folgt. Für alle knoten, wurde ein socket erzeugt und es wird
	nun ein non-blocking connect_ex versucht (verbindungsaufbau mit der gegenüberstelle), non-blocking
	und timeout schließen sich aus. Zuerst werden alle status abfragen abgewickelt, in der zwischenzeit
	haben die knoten die offline oder down sind zeit sich zu verbinden.
	Wenn dies länger als eine Sekunde dauert ist alles schon ok, wenn nicht, wird so lange gewartet,
	bis alle übrigen sockets mindestens 1 sekunde zeit für ihr connect hatten.
	Wenn nach der sekunde kein connect oder refuse kam, wird angenommen das der knoten nicht verfügbar
	ist! Danach werden die entsprechenden sockets wieder geschlossen.
	
	Für die Buchführung gibt es mehrere listen, die socket_liste und die status_liste, in der
	socketliste sind tupel mit socket und dazughörigem index in der status_liste geführt, ausserdem gibt
	es eine hashmap (fd_connectattempt_to_socket_list_index) die einen file_descriptor (fd) wieder einem
	index in der socket_liste zuweisen kann.
	
	status_list	socket_list			fd_XXX_to_socket_list_index	epoll returns fd
	(ib5)	<-------(#indexA, socketA)	<-------(fd --> #indexAA)
	(ib6)	<-------(#indexB, socketB)	<-------(fd --> #indexBB)	<-------fd
	(ibX)	<-------(#indexX, socketX)	<-------(fd --> #indexCC)
	
	'''
	
	#we use http server so port is 80
	service_port = 80
	socket_poll = select.epoll()
	
	#spawn and connect all sockets
	#print "try to spawn sockets: "+str(socket_list)
	for n,socket_item in enumerate(socket_list):
		#set nonblocking
		socket_list[n][1].setblocking(0)
		#register the socket at epoll, so we can check when its available or an error occurred
		socket_poll.register(socket_list[n][1].fileno(), select.EPOLLOUT)
		#remember the node,fd pair for later
#		node_index_in_status_list = socket_list[n][0]
#		status_list[node_index_in_status_list]=socket_list[n][1].fileno()
		#try to connect
		node_address = cherub_config.cluster[socket_item[0]][0]
		if socket_list[n][1].family == socket.AF_UNIX:
			#print "found AF_UNIX"
			socket_list[n][1].connect_ex(("/tmp/cherub_communication"))
		elif socket_list[n][1].family == socket.AF_INET:
			#print "found AF_INET"
			socket_list[n][1].connect_ex((node_address,service_port))
		else:
			print "WTF, this should not happen! Socket type not supportet: "+str(socket_list[n][1].family)
		
	#debug_end_timer = time.time()
	#print "after_spawn_and_connect: ", debug_end_timer - debug_start_timer
		
	# remember current time for later comparison and 1 second connection time check
	start_epoll_time=time.time()
	end_epoll_time=start_epoll_time
	# dont exit this loop as long as there are sockets for requests OR dont exit as long as not a full second has passed
	while len(fd_statusrequest_to_socket_list_index)>0 or (end_epoll_time-start_epoll_time<1.0):
	#while end_epoll_time-start_epoll_time<1.0:
		# check for events
		event_list = socket_poll.poll(0.001)
		while len(event_list)>0:
			#print "status_list: "+str(status_list)
			#print "socket_list: "+str(socket_list)	
			#print "loadmap    : "+str(loadmap)
			#print "eventlist  : "+str(event_list)
			#print "time       : "+str(end_epoll_time-start_epoll_time)
			#print "fd_statusrequest_...:"+str(fd_statusrequest_to_socket_list_index)
			#print "fd_connectattempt_...:"+str(fd_connectattempt_to_socket_list_index)
			
			current_fd, current_event = event_list.pop()
			# flags to know which type of socket we have here
			connectattempt = False
			statusrequest  = False

			# find the status_list index of the node from the corresponding fd
			if current_fd in fd_connectattempt_to_socket_list_index:
				node_index_in_socket_list = fd_connectattempt_to_socket_list_index[current_fd]
				node_index_in_status_list = socket_list[node_index_in_socket_list][0]
				connectattempt = True
		
			if current_fd in fd_statusrequest_to_socket_list_index:
				node_index_in_socket_list = fd_statusrequest_to_socket_list_index[current_fd]
				node_index_in_status_list = socket_list[node_index_in_socket_list][0]
				statusrequest  = True

			node_name = status_list[node_index_in_status_list]
			#print "fd: "+str(current_fd)+" node: "+str(node_name)+" # event: "+str(current_event)
			
			is_virtual = False
			if 'V' in node_name:
				is_virtual = True

			#print "node_index_in_status_list: "+str(node_index_in_status_list)
			#print "node_index_in_socket_list: "+str(node_index_in_socket_list)
			close_socket = True
			# if we have a node which is offline or down and we made only a connection attempt
			if connectattempt:
				# loadmap wird schon weiter oben gesetzt
				# ready for write means, connect has workt and service is reachable
				if (current_event & 4) == 4 and not is_virtual:
					#print "connected"
					statusmap[node_name] = 2
					status_list[node_index_in_status_list]= 2
					loadmap[node_name][current_timepoint] = loadmap[node_name][past_timepoint]
				# a virtual node should always be able to connect, then he will ask the simulator if its down or offline
				elif (current_event & 4) == 4 and is_virtual:
					if (current_event & 16) == 16:
						print "It seems, that your using virtual nodes. Please make sure the simulator is running while using them."
						statusmap[node_name] = -1
						status_list[node_index_in_status_list]= -1
						loadmap[node_name][current_timepoint] = -1
					else:
						socket_list[node_index_in_socket_list][1].send("get:"+str(node_name),socket.MSG_DONTWAIT)
						socket_poll.unregister(current_fd)
						socket_poll.register(current_fd, select.EPOLLIN)
						close_socket = False
					
				elif (current_event & 1) == 1 and is_virtual:
					status_response = socket_list[node_index_in_socket_list][1].recv(1024)
					#print status_response
					if status_response == '':
						print "Did not received anything. This should not happen."
						statusmap[node_name] = -1
						status_list[node_index_in_status_list]= -1
						loadmap[node_name][current_timepoint] = -1
					elif int(status_response) < 2 or int(status_response) > 3:
						print "received status is: "+str(status_response)
						print "this seems not correct. (maybe ipvsadm entry for node "+str(node_name)+" is wrong?)"
					elif int(status_response) == 2:
						loadmap[node_name][current_timepoint] = loadmap[node_name][past_timepoint]
						statusmap[node_name] = 2
						status_list[node_index_in_status_list]= 2
					elif int(status_response) == 3:
						loadmap[node_name][current_timepoint] = -1
						statusmap[node_name] = 3
						status_list[node_index_in_status_list]= 3
					else:
						print "received status is: "+str(status_response)
						print "this should not be the case. "+str(node_name)
				else:
					print "other status: "+str(current_event)
					loadmap[node_name][current_timepoint] = -1
					statusmap[node_name] = 3
					status_list[node_index_in_status_list]= 3

			# if it is a node which is registered, we must find out if its online or busy, therefore
			# we poll the nodes apache status_mod to get the current requests
			elif statusrequest:
				if (current_event & 8) == 8:
					print "received POLLERR, this should not happen."
					if current_event == 24:
						print "WARNING: received event 24: It seems, node "+str(node_name)+" is REGISTERED at the lvs but currently DOWN"
					statusmap[node_name] = -1
					status_list[node_index_in_status_list]= -1
				
				elif (current_event & 4) == 4:
					if (current_event & 16) == 16:
						print "received POLLHUP, this happens manly if your using virtual nodes. Please make sure the simulator is running while using them. If your not using virtual nodes, there is something to do in the implementation."
						print "Another error source might be the /proc/sys/net/core/somaxconn kernel parameter. It has to be high enough to create a socket for every virtual node!"
						statusmap[node_name] = -1
						status_list[node_index_in_status_list]= -1
					else:
						#print "ready to write "+str(node_name)
						# send get request
						if is_virtual:
							socket_list[node_index_in_socket_list][1].send("GET /server-status?auto HTTP/1.0\r\n\r\n:"+str(node_name),socket.MSG_DONTWAIT)
						else:
							socket_list[node_index_in_socket_list][1].send("GET /server-status?auto HTTP/1.0\r\n\r\n",socket.MSG_DONTWAIT)
						socket_poll.unregister(current_fd)
						socket_poll.register(current_fd, select.EPOLLIN)
						close_socket = False

				elif (current_event & 1) == 1:
					#print "ready to receive "+str(node_name)
					# receive status data
					status_response = socket_list[node_index_in_socket_list][1].recv(1024)
					#print status_response
					# this is the number of total accesses, which is assigned to
					# the loadmap at the corresponding position
					for line in status_response.split('\n'):
						if line.split(':')[0] == 'Total Accesses':
							loadmap[node_name][current_timepoint] = int(line.split(":")[1])
							#print "Total Accesses: "+str(loadmap[node_name][current_timepoint])
							break
						elif line.split(':')[0] == 'Scoreboard':
							print "could not find Total Accesses in the Answere. Something went wrong."
							print status_response
						else:
							continue
					
					#print loadmap[node_name][current_timepoint]
					#print loadmap
					#print node_name
					#print past_timepoint
					# alte -1'en werden durch den aktuellen wert ersetzt um besser
					# weiter arbeiten zu können
					#if loadmap[node_name][past_timepoint] == -1:
					#	for x in range(end_timepoint + 1):
					#		loadmap[node_name][past_timepoint - x % end_timepoint + 1] = loadmap[node_name][current_timepoint]
	
					#print loadmap
					old = loadmap[node_name][past_timepoint]
					new = loadmap[node_name][current_timepoint]
					
					# if the node was just turned on, the next calculation is totaly wrong without adjusting
					if old == -1 and new != -1:
						old = new
					
					#print loadmap
					print "Node: "+str(node_name)+" mean load: "+str((new - old)/float(cherub_config.update_everything))+"req/sec"
						
					# active connections >= less_then_ActivConn_is_idle --> the node is BUSY
					if ((new - old)/float(cherub_config.update_everything) >= int(less_then_ActivConn_is_idle)):
						statusmap[node_name] = 0
						status_list[node_index_in_status_list]= 0
					# active connections < less_then_ActivConn_is_idle --> the node is ONLINE
					else:
						statusmap[node_name] = 1
						status_list[node_index_in_status_list]= 1
				else:
					# error case, should not occurre
					print "unexpected event:"
					print "current_fd: "+str(current_fd)
					print "current_event:"+str(current_event)
					print "fd_statusrequest_to_socket_list_index: "+str(fd_statusrequest_to_socket_list_index)
					print "fd_connectattempt_to_socket_list_index: "+str(fd_connectattempt_to_socket_list_index)
					print "socket_list: "+str(socket_list)
					print "status_list: "+str(status_list)
					return -1

			else:
				# error case, should not occurre
				print "current fd was not found in the lists:"
				print "current_fd: "+str(current_fd)
				print "current_event:"+str(current_event)
				print "fd_statusrequest_to_socket_list_index: "+str(fd_statusrequest_to_socket_list_index)
				print "fd_connectattempt_to_socket_list_index: "+str(fd_connectattempt_to_socket_list_index)
				print "socket_list: "+str(socket_list)
				print "status_list: "+str(status_list)
				return -1
			
			if close_socket:
				#remove the fd from the polling list
				socket_poll.unregister(current_fd)
				#close coresponding socket and degrade the counter
				socket_list[node_index_in_socket_list][1].close()
				socket_list[node_index_in_socket_list][0] = -1
				if connectattempt:
					del fd_connectattempt_to_socket_list_index[current_fd]
				if statusrequest:
					del fd_statusrequest_to_socket_list_index[current_fd]
				
			#print "loadmap    : "+str(loadmap)
		end_epoll_time=time.time()
	
	#debug_end_timer = time.time()
	if (end_epoll_time-start_epoll_time>1.1):
		print "requestroutin took too long: "+str(end_epoll_time-start_epoll_time)+" sec"
	#print "after_epoll_timeout: ", debug_end_timer - debug_start_timer

	# close remaining sockets if no reaction was visible. set status of this nodes to down
	n = len(socket_list)
	while n > 0:
		#print socket_list
		# if the socket was already handelet
		if socket_list[n-1][0] == -1:
			socket_list[n-1][1].close()
		# if the socket did not came back
		else:
			socket_list[n-1][1].close()
			node_name = status_list[socket_list[n-1][0]]	# correct that way?
			print "NOANSWERE_BY: "+str(node_name)
			statusmap[node_name] = 3			# correct that way?
			loadmap[node_name][current_timepoint] = -1	# correct that way?
			status_list[socket_list[n-1][0]] = 3 #down
		# in either case, delete it form the list
		del socket_list[n-1]
		n = n - 1
	
	#debug_end_timer = time.time()
	#print "after_closing_remaining_sockets: ", debug_end_timer - debug_start_timer
	
	# correct the init values
	for node_name, node_value in loadmap.iteritems():
		# only in first round after init
		if node_value[past_timepoint] == -2:
			#edgecase, if node starts in register
			if node_value[current_timepoint] == -2:
				node_value[current_timepoint] = 0
			for x in range(len(node_value)):
				node_value[(past_timepoint + x) % len(node_value)] = node_value[current_timepoint]
		# break out of the loop as fast as possible
		else:
			break
	
	print "loadmap: "+str(loadmap)
	#print "loadmap: "+str(loadmap["ib5"])
	#print "loadmap: "+str(loadmap["ib6"])
	#print "current_timepoint: "+str(current_timepoint)+ "  past_timepoint: "+str(past_timepoint)
	
	# if something went wrong, the status list entry is still the node name, if so, its state is set to UNKNOWN (-1)
	#print status_list
	for index in range(len(status_list)):
		value = status_list[index]
		if type(value) is int:
			if value != -1 and value != 0 and value != 1 and value != 2 and value != 3:
				status_list[index] = -1
		else:
			print "something went wrong. values should be digits."
			print "but value is: "+str(value)
			print "status_list is:"+str(status_list)
			status_list[index] = -1
	# additional debugstuff
	print_balancemap()
	
	#debug_end_timer = time.time()
	#print "status_parallel_duration: ", debug_end_timer - debug_start_timer
	
	return status_list

def getTCPDump(dumplength):
	'''returns the tuple of stout,stderr of a tcpdump of length 'dumplength' but only GET requests '''
	logfile="/home/samson/src/cherub/cherub_lvs.log"
	command = 'tail -n '+str(dumplength)+' '+str(logfile)
	tcpdump=subprocess.Popen(command,shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	return tcpdump.communicate()


def getSloap(node_name):
	'''returns the linear Regression of a specific node (function should better be renamed to somthing like linearRegressionSingleNode)'''
	currentTime = int(time.time())
	
	tcpdumpResult,err = getTCPDump(linear_regression_history)
	tcpdumpResult = tcpdumpResult.split('\n')
	tcpdumpResult.remove('')
	
	regressionPairs=[]
	for line in tcpdumpResult:
		for field in line.split():
			if len(field.split(':')) == 2 and field.split(':')[1] == node_name:
				regressionPairs.append((int(line.split()[0]),int(field.split(':')[0])))
	
	return lineareRegression(regressionPairs)



def cherub_adjust_update_cycle(current_cycle):
	is_equal = True
	# do we have more then one service but every node provide all services?
	for node in cherub_config.cluster:
		if subtext.count(node[0]) != numberofservices:
			is_equal = False
	if is_equal:
		# use global load also in this case
		# print 'exit on only same service'
		return 0
	if numberofservices < 1:
		return -1
	
	ipvsadm2 = subprocess.Popen(["ipvsadm"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout, stderr = ipvsadm2.communicate()
	subtext = stdout.split('\n')
	# setofservices, here is every service stored, who needs support
	setofservices = set()
	for lines in subtext[1::]:
		if (len(lines.split()) > 5):
			if (lines.split()[4].isdigit()):
				# if ther is a 5th element in the line, and its a digit, its the ActivConn value
				if int(lines.split()[4]) > int(cherub_config.overload):
					setofservices.add(lines.split()[1].split(':')[1])			
	
	# if ther is no service who needs support, return 0 immediately
	print 'services:'+str(setofservices)
	if len(setofservices) == 0:
		# print 'exit on no service needs support'
		return 0
	
	availableL1 = set()
	availableL2 = set()
	availableL3 = set()
	busyL1 = set()
	busyL2 = set()
	busyL3 = set()
	
	for node in cherub_config.cluster:
		status = cherub_status(node[0])
		print 'status of node:'+str(node[0])+' is '+str(status)
		# if node status is offline or down, it is available for support
		if status > 1:
			if node[2] == 1:
				availableL1.add(node[0])
			if node[2] == 2:
				availableL2.add(node[0])
			if node[2] == 3:
				availableL3.add(node[0])
		# in every other state, they are not ready for support
		else:
			if node[2] == 1:
				busyL1.add(node[0])
			if node[2] == 2:
				busyL2.add(node[0])
			if node[2] == 3:
				busyL3.add(node[0])

def cherub_node_load(node_name):
		return 0

def getGlobalLinearRegression():
	'''returns the linear regression of the load of all nodes accumulated'''
	regressionPairs=[]
	timewindow = 0
	index = (linear_regression_history / cherub_config.update_everything) - 1
	last_value = 0
	#print loadmap
	while timewindow < (linear_regression_history / cherub_config.update_everything):
		sumup = 0
		for node_name, node_value in loadmap.iteritems():
			if node_value[(current_timepoint - timewindow) % (end_timepoint + 1)] == -1 or node_value[(current_timepoint - timewindow) % (end_timepoint + 1)] == 0.0:
				continue
			elif node_value[(past_timepoint - timewindow) % (end_timepoint + 1)] == -1:
				# careful here, if a system other then apache is not resetting its request counters on a shutdown/restart this will leads to serious trouble in the calculation of the slope!!!
				print "first time after node "+str(node_name)+" was down!"
				new = int(node_value[(current_timepoint - timewindow) % (end_timepoint + 1)])
				old = 0.0
				node_value[(past_timepoint    - timewindow) % (end_timepoint + 1)] = 0.0
			else:
				#print sumup
				#print current_timepoint
				#print timewindow
				#print node_value[current_timepoint - timewindow]
				new = int(node_value[(current_timepoint - timewindow) % (end_timepoint + 1)])
				old = int(node_value[(past_timepoint    - timewindow) % (end_timepoint + 1)])
			sumup = sumup + ((new - old) / float(cherub_config.update_everything))
		
		# edgecase, wenn gerade angeschaltet wurde, wird anstelle 0 der erste richtige Wert
		# verwendet, dies soll verhindern, dass die 0 für in der LR einen Anstieg verursacht
		# ohne, dass wirklich ein Anstieg vorliegt, nur weil plötzlich eine Grundlast vorliegt
		# ansonsten kann der sumup wert NIE 0.0 sein, da mindestens ein status request gesendet
		# wurde.
		if sumup == 0.0:
			sumup = last_value
		# wenn die summe sogar negativ ist, gab es vermutlich einen reset in den httpd stats
		# der letzte Wert, ist dann vemrutlich der genauste
		if sumup < 0:
			print "sumup was negative, very odd, loadmap was: "+str(loadmap)
			sumup = last_value
		regressionPairs.append((int(index)*cherub_config.update_everything, sumup))
		last_value = sumup
		timewindow = timewindow + 1
		index = index - 1
	print "regression Pair values:\n"+str([regressionPairs[x][1] for x in range(len(regressionPairs))])
	return lineareRegression(regressionPairs), regressionPairs

def cherub_global_load():
	'''this script has to return the following values 
	 -1 = if an error occured
	0-n = for the number of nodes needet to handle the load 
	'''
	try:	
		start_time = time.time()
		load = 0
		ActivConn = 0
		activeNodes = 0
		
		for node in statusmap:
			if statusmap[node] != 1 and statusmap[node] != 0:
				continue
			activeNodes = activeNodes + 1
				
			#sys.stdout.write(str(node)+" : "+str(loadmap[node][current_timepoint])+"req/sec (meanfunction)")
			ActivConn = ActivConn + (loadmap[node][current_timepoint] - loadmap[node][past_timepoint]) / float(cherub_config.update_everything)

		globalLinearRegression = getGlobalLinearRegression()
		sloap, pairs = globalLinearRegression
		sloap = sloap[1]
		print "ActivConn: "+str(ActivConn)+ "  globalsloap: "+str(sloap)
		#print "(("+str(cherub_config.overload)+" * "+str(activeNodes)+") - "+str(ActivConn)+") == 0"
		
		nodes_for_backup = (len(cherub_config.cluster) * cherub_config.backup) / 100
		# current_backup - nodes which are not necessary (and thus are already backup!) #
		current_backup = int(((cherub_config.overload * activeNodes) - ActivConn ) / cherub_config.overload)
		current_backup_cap = current_backup * cherub_config.overload
		newbackup = max(nodes_for_backup - current_backup, 0)

		# compute the load difference by suptracting the current activ connections, the load difference which is forcastet and the current capacity which is provided by the backup.
		load_difference = (cherub_config.overload * activeNodes) - ActivConn - (sloap * boot_duration) - current_backup_cap
		load = load_difference * -1 / float(cherub_config.overload)
		print "load_difference: "+str(load_difference)+" composed of ActivConn:\t"+str(ActivConn)+" sloap*boot_duration:\t"+str(sloap * boot_duration)+" current_backup_cap:\t"+str(current_backup_cap)+" load: "+str(load)
		
		#print loadmap
		timestamp=time.time() - debugstart
		print "TIMESTAMP: "+str(timestamp)
		if load > 0:
			#print "function duration: " + str(time.time() - start_time) + " sec by "+str(len(cherub_config.cluster))+" nodes"
			# the plus 1 is due to rounding down when casting and if its the whol 1 node, it cant be wrong to turn another one on straight away
			debugfile.write("%f\t%d\t%f\t%d\t%d\t%s\n" % (timestamp, load + 1, sloap, activeNodes, ActivConn, str(pairs)))
			debugfile.flush()
			return int(load + 1 + newbackup)
		#elif load <= -1:
		else:
			# return how many nodes should turned off
			# dont use prediction for turning off nodes, otherwise nodes will be turned off to early
			
			
			print "Can shut down: " ,int((-1*((cherub_config.overload * activeNodes) - ActivConn ) / cherub_config.overload) + nodes_for_backup)
			debugfile.write("%f\t%d\t%f\t%d\t%d\t%s\n" % (timestamp, int(-1*((cherub_config.overload * activeNodes) - ActivConn ) / cherub_config.overload), sloap, activeNodes, ActivConn, str(pairs)))
			debugfile.flush()
			return int((-1*((cherub_config.overload * activeNodes) - ActivConn ) / cherub_config.overload) + nodes_for_backup)
		#else:
			#print "function duration: " + str(time.time() - start_time) + " sec by "+str(len(cherub_config.cluster))+" nodes"
			# if the load is very weak sinking (between 0 and -1 then we dont turn on or off)
		#	debugfile.write("%f\t%d\t%f\t%d\t%d\t%s\n" % (timestamp, 0, sloap, activeNodes, ActivConn, str(pairs)))
		#	debugfile.flush()
		#	return 0
	except:
                syslog.syslog(syslog.LOG_ERR, "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0]))
		print "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0])
		syslog.syslog(syslog.LOG_ERR, "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[1]))
		print sys.exc_info()[1]
		syslog.syslog(syslog.LOG_ERR, "While processing, an unexpected error occured in function cherub_global_load:"+str(traceback.print_tb(sys.exc_info()[2])))
		print traceback.print_tb(sys.exc_info()[2])
                return 0


                
'''
############################################################################################################
############################################################################################################
############################################################################################################
# Old Functions, only in the source for later reference and benchmarking ###################################
############################################################################################################
############################################################################################################
############################################################################################################
'''

def getGlobalLinearRegression_TCPDUMP_version():
	'''returns the linear regression of the load of all nodes accumulated'''
	currentTime = int(time.time())
	tcpdumpResult,err = getTCPDump(linear_regression_history)
	tcpdumpResult = tcpdumpResult.split('\n')
	tcpdumpResult.remove('')
	regressionPairs=[]
	for line in tcpdumpResult:
		linesum = 0
		line = line.split()
		currenttime = line.pop(0)
		for field in line:
			linesum = linesum + int(field.split(":")[0])
		regressionPairs.append((int(currenttime), int(linesum)))
	#print "getGlobalLinearRegression invoking regression with\n"+str(regressionPairs)
	return lineareRegression(regressionPairs)

def findLastSecondConnections(node_name):
	'''returns the number of requests from the current or last second, dependent on what is larger'''
	# command for the wgets
	# tcpdump -Ar cherub_lvs.log | fgrep -B 3 "GET " | fgrep ib7 | 
	# grep ..:..:..\. | tail -n 15 | awk 'BEGIN{FS="[:.]"} {print $3}'
	# to use it, we have to let a daemon geather connection informations
	# in a file cherub_lvs.log
	tcpdumpResult,err = getTCPDump(1)
	tcpdumpResult = tcpdumpResult.split('\n')
	tcpdumpResult.remove('')
	# if there is no output for this node, the load is 0	
	for field in tcpdumpResult[0].split():
		if len(field.split(':')) == 2 and field.split(':')[1] == node_name:
			return field.split(':')[0]
	return 0

'''returns the mean of the number of requests from the last n seconds'''
def findMeanConnectionRate(node_name,n):
	currentTimeCommand = "date +%s" 
	currentTimeProc = subprocess.Popen(currentTimeCommand,shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	currentTime = currentTimeProc.communicate()[0].split('\n')[0]
	tcpdumpResult,err = getTCPDump(n)
	tcpdumpResult = tcpdumpResult.split('\n')
	tcpdumpResult.remove('')
	# if there is no output for this node, the load is 0
	mean=0.0
	numbers = len(tcpdumpResult)
	for line in tcpdumpResult:
		for field in line.split():
			if len(field.split(':')) == 2 and field.split(':')[1] == node_name:
				mean += int(field.split(':')[0])
	return mean/numbers

def cherub_global_load2012():
	'''
	THIS IS A OPSOLET FUNCTION AND ONLY IN USE FOR THE UNIT TESTER FOR COMPARISON REASON
	'''
	try:	
		# only if all nodes provides the same services, you can use global_load
		ipvsadm = subprocess.Popen(["ipvsadm","-S"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		stdout, stderr = ipvsadm.communicate()
		numberofservices = stdout.count('-A ')
		if numberofservices < 1:
			print "WARNING: no service defined"
			return -1
		
		for node in cherub_config.cluster:
			# if the number of services the nodes provide is X,
			# every node has to occure X times in the output
			summe = stdout.count(node[0])
			if summe != numberofservices:
				print "problems with the number of services!"
				return 0
		load = False
		for node in cherub_config.cluster:
			# XXX here are to positions
			if use_mean:
				print "using mean"
				ActivConnPerNode = findMeanConnectionRate(node[0],cherub_config.update_everything)
			else:
				print "using last second"
				ActivConnPerNode = findLastSecondConnections(node[0])
			sloap = getSloap(node[0])
			print str(node[0])+" : "+str(sloap[3])+"req/sec (mean)"
			print str(node[0])+" : "+str(ActivConnPerNode)+"req/sec (meanfunction)"
			print str(sloap[1])+" sloap"
			#print sloap
			if (sloap[1] == 0):
				continue
			if ((float(cherub_config.more_then_ActivConn_is_overload) - sloap[3]) / sloap[1]) < boot_duration and sloap[1] >= 0.0:
				print "(cherub_config.more_then_ActivConn_is_overload - sloap[3]) / sloap[1]) < boot_duration"
				load = True
			# fix threshold
			if int(ActivConnPerNode) > int(cherub_config.more_then_ActivConn_is_overload):
				load = True
		if load:
			return 1
		else:
			return 0
	except:
                syslog.syslog(syslog.LOG_ERR, "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0]))
		print "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0])
		print sys.exc_info()[1]
		print traceback.print_tb(sys.exc_info()[2])
                return 0

                
                
# second parallel version, using nonblocking connects tcp requests - Status function not fully implemented
# for example, no status given busy or online possible due to no checks
def cherub_status_parallel_nonblocking_connects():
  	'''must return the following states
	CHERUB_UNKNOWN = -1 = if the state of the node is unknown or an error occures
	CHERUB_BUSY    =  0 = if the node is booted and BUSY/WORKING and REGISTERT to the RMS
	CHERUB_ONLINE  =  1 = if the node is booted but IDLE and REGISTERT to the RMS
	CHERUB_OFFLINE =  2 = if the node is booted but NOT REGISTERT to the RMS
	CHERUB_DOWN    =  3 = if the node is shutdown and NOT REGISTERT to the RMS
	'''
	
	#status_list=range(len(cherub_config.cluster))
	# fill the list with the name, so if a name is returend, you know which node has no status
	status_list = [cherub_config.cluster[x][0] for x in range(len(cherub_config.cluster))]
	socket_list = []
	fd_to_socket_list_index = {}
	
	# first, build the dynamic busy threshold
	running = 0
	for node in statusmap:
		if statusmap[node] == 0 or statusmap[node] == 1:
			running = running +1
	if running == 0:
		running = 1
	less_then_ActivConn_is_idle = cherub_config.overload - (cherub_config.overload / running)
	
	p = subprocess.Popen(["ipvsadm"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	# get all lines from ipvsadm
	lines = p.communicate()[0].split('\n')
	# parse the lines of ipvsadm
	for line in lines:
		ActivConnPerNode = 0
		# the name is the first arguement after spliting the line, spliting on dot again
		# is necessary cause ipvsadm adds whole domain name
		if len(line.split())>=1 and status_list.count(line.split()[1].split('.')[0])>0:
			process_index = status_list.index(line.split()[1].split('.')[0])
		# if an ip is given, there is no domain name so we split at : and not at a dot
		elif len(line.split())>=1 and status_list.count(line.split()[1].split(':')[0])>0:
			process_index = status_list.index(line.split()[1].split(':')[0])
		else:
			continue
		
		# weight > 0 --> registert
		# at the moment, it is ignort if it has activconn
		#print line
		if  int(line.split()[3]) > 0:
			# check what load method is used
			if use_mean:
				# 1: last update period avg
				ActivConnPerNode = findMeanConnectionRate(line.split()[1].split('.')[0],cherub_config.update_everything)
			else:
				# 2: last second
				ActivConnPerNode = findLastSecondConnections(line.split()[1].split('.')[0])
			
			# active connections >= less_then_ActivConn_is_idle --> the node is BUSY
			if(int(ActivConnPerNode) >= int(less_then_ActivConn_is_idle)):
				statusmap[line.split()[1].split('.')[0]] = 0
				status_list[process_index] = 0
			# active connections < less_then_ActivConn_is_idle --> the node is ONLINE
			else:
				statusmap[line.split()[1].split('.')[0]] = 1
				status_list[process_index] = 1
		# if its not registerd, we have to check for connection, add it to the socket list
		else:
			#create socket for later connection attempt
			# list has tuple of (node_index_in_status_list, socket)
			socket_helper = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
			socket_list.append((process_index, socket_helper))
			# mapping fd to index in socket_list
			fd_to_socket_list_index[socket_helper.fileno()]=len(socket_list)-1
	
	'''der netzwerk part funktioniert wie folgt. Für alle knoten dessen gewicht im ipvsadm 0 waren,
	wird ein socket erzeugt und ein non-blocking connect_ex versucht, non-blocking mit einem timeout
	gibt es nicht, daher warte ich genau eine sekunde um allen sockets zeit für ihr connect zu geben.
	Wenn nach der sekunde kein connect oder refuse kam, wird angenommen das der knoten nicht verfügbar
	ist! Danach werden die entsprechenden sockets wieder geschlossen.
	Für die Buchführung gibt es mehrere listen, die socket_liste und die status_liste, in der
	socketliste sind tupel mit socket und dazughörigem index in der status_liste geführt, ausserdem gibt
	es eine hashmap (fd_to_socket_list_index) die einen file_descriptor (fd) wieder einem index in der
	socket_liste zuweisen kann. In der status_liste wird indess für alle nicht fertigen knoten deren fd
	gespeichert, dies ist ein wenig unsauberm, da die niedrigen fd's gleich einem status wären.'''
	
	#we use http server so port is 80
	service_port = 80
	socket_poll = select.epoll()
	
	#spawn and connect all sockets
	for n,socket_item in enumerate(socket_list):
		node_address = cherub_config.cluster[socket_item[0]][0]
		#set nonblocking
		socket_list[n][1].setblocking(0)
		#register the socket at epoll, so we can check when its available or an error occurred
		socket_poll.register(socket_list[n][1].fileno(), select.EPOLLOUT)
		#remember the node,fd pair for later
		node_index_in_status_list = socket_list[n][0]
		status_list[node_index_in_status_list]=socket_list[n][1].fileno()
		#try to connect
		socket_list[n][1].connect_ex((node_address,service_port))
		
	#print "status_list: "+str(status_list)
	#print "socket_list: "+str(socket_list)
		
		
	#check all connects via epoll after we have waited one second
	time.sleep(1)
	n = len(socket_list)
	
	#check for events
	event_list = socket_poll.poll(0)
	while len(event_list)>0:
		current_fd, current_event = event_list.pop()
		#print "fd: "+str(current_fd)+" # event: "+str(current_event)
		
		#find the status_list index of the node from the corresponding fd
		node_index_in_status_list = status_list.index(current_fd)
		node_index_in_socket_list = fd_to_socket_list_index[current_fd]
		#print "node_index_in_status_list: "+str(node_index_in_status_list)
		#print "node_index_in_socket_list: "+str(node_index_in_socket_list)
		
		#ready for write means, connect has workt and service is reachable
		if current_event == 4:
			#print "connected"
			status_list[node_index_in_status_list]= 2
		else:
			#print "other status: "+str(current_event)
			status_list[node_index_in_status_list]= 3
		#remove the fd from the polling list
		socket_poll.unregister(current_fd)
		#close coresponding socket and degrade the counter
		socket_list[node_index_in_socket_list][1].close()
		socket_list[node_index_in_socket_list] = 0

	n = len(socket_list)
	while n > 0:
		if socket_list[n-1] != 0:
			socket_list[n-1][1].close()
			status_list[socket_list[n-1][0]] = 3 #down
			socket_list[n-1] = 0
		n = n - 1

	print "returning: "+str(status_list)
	return status_list

def cherub_status_wrapper(self, node_name, status_queue, position):
	status_queue.put((position,cherub_status(node_name)))
	return 0



# first parallel version, using queues and threads
def cherub_status_parallel_thread_version():
	status_list=range(len(cherub_config.cluster))
	thread_list=range(len(cherub_config.cluster))
	status_queue=Queue.Queue()
	n=0
	#spawn all threads
	for node in cherub_config.cluster:
		name=node[0]
		thread_list[n] = threading.Thread(target=cherub_status_wrapper, args=(threading, name, status_queue, n))
		thread_list[n].start()
		n=n+1
	n=0
	#join them all again
	for node in cherub_config.cluster:
		thread_list[n].join()
		n=n+1
	n=0
	#insert all results
	for node in cherub_config.cluster:
		item = status_queue.get()
		status_list[item[0]]=item[1]
	return status_list

def cherub_status(node_name):
	'''must return the following states
	CHERUB_UNKNOWN = -1 = if the state of the node is unknown or an error occures
	CHERUB_BUSY    =  0 = if the node is booted and BUSY/WORKING and REGISTERT to the RMS
	CHERUB_ONLINE  =  1 = if the node is booted but IDLE and REGISTERT to the RMS
	CHERUB_OFFLINE =  2 = if the node is booted but NOT REGISTERT to the RMS
	CHERUB_DOWN    =  3 = if the node is shutdown and NOT REGISTERT to the RMS
	'''
	try:
		is_virtual = False
		if 'V' in node_name:
			is_virtual = True
		# first, build the dynamic busy threshold
		running = 0
		for node in statusmap:
			if statusmap[node] == 0 or statusmap[node] == 1:
				running = running +1
		if running == 0:
			running = 1
		less_then_ActivConn_is_idle = cherub_config.overload - (cherub_config.overload / running)
		### mit der ssh ConnectTimeout option sollte das ssh timeout als down beweis ausreichen
		# first ping it to look, if it is available
		#pingproc = subprocess.Popen(["ping","-c","1","-W","1",node_name],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		#tuple = pingproc.communicate()
		#pingtext = tuple[0]
		#if is_virtual:
		#	pingtext = 'this is a virtual nodes'
		# if the ping fails, it is in status DOWN
		#if pingtext.count('0 received') == 1:
		#	status_lock.acquire()			
		#	statusmap[node_name] = 3
		#	status_lock.release()
		#	return 3

		# get the status of the lvs
		ActivConnPerNode = 0
		if not is_virtual:
			p = subprocess.Popen(["ipvsadm"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			tuple = p.communicate()
			text = tuple[0]
			subtext = text.split('\n')
			is_in = 0
			registert = False
			# parse the lins of the lvs status
			for lines in subtext:
				# if the node provide a service, count how many activ connections it has
				is_in = lines.count(" "+node_name+":")
				if is_in == 1:
					# if a nodes weight is greater 0, it is registert in our sense
					# at the moment, it is ignort if it has activconn, ask [4]>0 to take it
					# into consideration
					if  int(lines.split()[3]) > 0:
						registert = True
						# XXX How to find out the load
						if use_mean:
							# 1: last update period avg
							ActivConnPerNode = findMeanConnectionRate(node_name,cherub_config.update_everything)
						else:
							# 2: last second
							ActivConnPerNode = findLastSecondConnections(node_name)
				is_in = 0
			if not registert:
		
				# then look if httpd is up
				httpd = subprocess.Popen(["ssh","-o","ConnectTimeout=1",node_name,"/etc/init.d/httpd","status"],stdout=subprocess.PIPE, stderr=subprocess.PIPE)
				tuple = httpd.communicate()
				httpdtext = tuple[0]
				if httpdtext.count('is running...') == 1:
					# if httpd is up, its OFFLINE
					status_lock.acquire()			
					statusmap[node_name] = 2
					status_lock.release()
					return 2
				else:
					# if httpd is still not up, consider as DOWN
					status_lock.acquire()			
					statusmap[node_name] = 3
					status_lock.release()
					return 3
		
		elif is_virtual:
			status_lock.acquire()			
			s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
			try:
				s.connect("/tmp/cherub_communication")
			except Exception, e:
				status_lock.release()
				print "ERROR: "+str(e)
				print "Could not connect to communicator. Environmental Simulation seems not active."
				print "Please Start the Simulator and try again or delete the Virtual servers from the"
				print "cherub configuration. (All Nodes containing a upper case V are considered Virtual)"
				print "\nExiting"
				return -2
			s.send("get:"+node_name)
			recv_state = int(s.recv(1024))
			s.close()
			status_lock.release()
			#print "recv: "+str(recv_state)
			if recv_state == -1:
				statusmap[node_name] = -1
				return -1
			elif recv_state == 0 or recv_state == 1:
				if use_mean:
					# 1: last update period avg
					ActivConnPerNode = findMeanConnectionRate(node_name,cherub_config.update_everything)
					#print "ActivConnPerNode (mean): "+str(ActivConnPerNode)
				else:
					# 2: last second
					ActivConnPerNode = findLastSecondConnections(node_name)
					#print "ActivConnPerNode (lastsecond): "+str(ActivConnPerNode)
				status_lock.acquire()			
				s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
				s.connect("/tmp/cherub_communication")
				#print "int(ActivConnPerNode): "+str(int(ActivConnPerNode))
				#print "int(less_then_ActivConn_is_idle)): "+str(int(less_then_ActivConn_is_idle))
				if(int(ActivConnPerNode) >= int(less_then_ActivConn_is_idle)):	# BUSY
					statusmap[node_name] = 0
					if recv_state != 0:
						print "send set BUSY command"
						s.send("set:"+str(node_name)+":0")
						s.close()
					status_lock.release()
					return 0				
				if(int(ActivConnPerNode) < int(less_then_ActivConn_is_idle)):	# ONLINE
					statusmap[node_name] = 1
					if recv_state != 1:
						print "send set ONLINE command"
						s.send("set:"+str(node_name)+":1")
						s.close()
					status_lock.release()
					return 1
				status_lock.release()
				return -1
				statusmap[node_name] = -1
			elif recv_state == 2:
				statusmap[node_name] = 2
				return 2
			elif recv_state == 3:
				statusmap[node_name] = 3
				return 3

		# if the active connections of a node are MORE then a threshold set by the admin in the cherub_config.py, the node is BUSY
		# NOT overload --> its for the load!
		if(int(ActivConnPerNode) >= int(less_then_ActivConn_is_idle)):
			status_lock.acquire()			
			statusmap[node_name] = 0
			status_lock.release()
			return 0
		# if the active connections of a node are LESS then a threshold set by the admin in the cherub_config.py, the node is ONLINE
		if(int(ActivConnPerNode) < int(less_then_ActivConn_is_idle)):
			status_lock.acquire()			
			statusmap[node_name] = 1
			status_lock.release()
			return 1
		# otherwise the status is UNKNOWN
		status_lock.acquire()			
		statusmap[node_name] = -1
		status_lock.release()
		return -1
	except:
                syslog.syslog(syslog.LOG_ERR, "While processing node:"+str(node_name)+", an unexpected error occured in function cherub_status:"+str(sys.exc_info()[0]))
                syslog.syslog(syslog.LOG_ERR, "value:"+str(sys.exc_info()[1]))
                syslog.syslog(syslog.LOG_ERR, "line:"+str(traceback.tb_lineno(sys.exc_info()[2])))
                syslog.syslog(syslog.LOG_ERR, "traceback:"+str(traceback.print_tb(sys.exc_info()[2])))
                return -1

                
def cherub_global_load_using_TCPDUMP():
	'''this script has to return the following values 
	 -1 = if an error occured
	0-n = for the number of nodes needet to handle the load 
	'''
	try:	
		start_time = time.time()
		load = 0
		ActivConn = 0
		activeNodes = 0
		if use_mean:
			print "using mean"
		else:
			print "using last second"
		
		for node in statusmap:
			if statusmap[node] != 1 and statusmap[node] != 0:
				continue
			activeNodes = activeNodes + 1
				
			if use_mean:
				ActivConnPerNode = findMeanConnectionRate(node,cherub_config.update_everything)
			#	sys.stdout.write(str(node)+" : "+str(ActivConnPerNode)+"req/sec (meanfunction)")
			else:
				ActivConnPerNode = findLastSecondConnections(node)
			#	sys.stdout.write(str(node)+" : "+str(ActivConnPerNode)+"req/sec (lastsecond)")
			ActivConn = ActivConn + ActivConnPerNode

		globalLinearRegression = getGlobalLinearRegression()
		sloap = globalLinearRegression[1]
		#print "ActivConn: "+str(ActivConn)+ "  globalsloap: "+str(sloap)
		#print "(("+str(cherub_config.overload)+" * "+str(activeNodes)+") - "+str(ActivConn)+") == 0"
		
		load_difference = (cherub_config.overload * activeNodes) - ActivConn - (sloap * boot_duration)
		load = load_difference * -1 / cherub_config.overload
		#print "load_difference: "+str(load_difference)+"   load: "+str(load)
		if load > 0:
			#print "function duration: " + str(time.time() - start_time) + " sec by "+str(len(cherub_config.cluster))+" nodes"
			return int(load + 1)
		else:
			#print "function duration: " + str(time.time() - start_time) + " sec by "+str(len(cherub_config.cluster))+" nodes"
			return 0
	except:
                syslog.syslog(syslog.LOG_ERR, "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0]))
		print "While processing, an unexpected error occured in function cherub_global_load:"+str(sys.exc_info()[0])
		print sys.exc_info()[1]
		print traceback.print_tb(sys.exc_info()[2])
                return 0
