//Copyright (C) 2010 Simon Kiertscher
//
//This program is free software; you can redistribute it and/or modify it 
//under the terms of the GNU General Public License as published by the 
//Free Software Foundation; either version 3 of the License, or (at your option) 
//any later version.
//
//This program is distributed in the hope that it will be useful, but WITHOUT 
//ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
//FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License along with 
//this program; if not, see <http://www.gnu.org/licenses/>.

#include <Python.h>
#include <unistd.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <assert.h>


#include <unistd.h>
#include <syslog.h> 

#include <math.h>
#include <complex.h>
#include <sys/time.h>

#include <errno.h>

#include "cherub_api.h"
#include "cherub_test.h"

/* version is compost of Mainlevel.Featurelevel.Patchlevel + revision number of cherub.c! */
#define VERSION "1.3.12 $Rev: 15311 $"
#define YEAR "$Date: 2016-02-05 17:50:05 +0100 (Fri, 05 Feb 2016) $"

static volatile sig_atomic_t cherub_terminate = 0;
static volatile sig_atomic_t cherub_reload = 0;

//backmapping of the enum, you have only to add +2
char *cherub_states[] = {"ERROR","UNKNOWN","BUSY","ONLINE","OFFLINE","DOWN"};
char *cherub_phasen[] = {"L1","L2","L3"};

int min(int a, int b) 
{
	if(a < b) 
		return a;
	else
		return b;
}

static void signal_handler (int signal_number)
{
	switch(signal_number)
	{
		case SIGINT  :	cherub_terminate = 1;		syslog(LOG_INFO, "[MASTER] [got a SIGINT SIGNAL]");	break ;
		case SIGTERM : 	cherub_terminate = 1;		syslog(LOG_INFO, "[MASTER] [got a SIGTERM SIGNAL]");	break ;
		case SIGHUP  :	cherub_reload = 1;		syslog(LOG_INFO, "[MASTER] [got a SIGHUP SIGNAL]");	break ;
		/*case SIGHUP  :	syslog(LOG_INFO, "[MASTER] [got a SIGHUP SIGNAL - RELOAD DEACTIVATED AT THE MOMENT]");
				printf("SORRY BUT RELOAD IS DEACTIVATED AT THE MOMENT DUE TO CRASHES!\n");
				break;*/
		case SIGFPE  :	syslog(LOG_ERR, "[MASTER] [try division by zero]"); break ;
		
	}
	return;
}

int phase_to_set_on(int *busy, int *not_busy)
{
	int tmp = 0;
	syslog(LOG_DEBUG, "[MASTER] [phase_to_set_on] not_busy[L1]=%d not_busy[L2]=%d not_busy[L3]=%d\n",not_busy[L1],not_busy[L2],not_busy[L3]);
	//if all three phase has capacity
	if((not_busy[L1]  > 0) && (not_busy[L2]  > 0) && (not_busy[L3]  > 0))
	{
		tmp = min(busy[L1],min(busy[L2],busy[L3]));
		if(tmp == busy[L1]) {return L1;}
		if(tmp == busy[L2]) {return L2;}
		if(tmp == busy[L3]) {return L3;}
	}
		
	//if only two phase has capacity
	if((not_busy[L1] == 0) && (not_busy[L2]  > 0) && (not_busy[L3]  > 0))
	{
		tmp = min(busy[L2],busy[L3]);
		if(tmp == busy[L2]) {return L2;}
		if(tmp == busy[L3]) {return L3;}
	}
	if((not_busy[L1]  > 0) && (not_busy[L2] == 0) && (not_busy[L3]  > 0))
	{
		tmp = min(busy[L1],busy[L3]);
		if(tmp == busy[L1]) {return L1;}
		if(tmp == busy[L3]) {return L3;}

	}
	if((not_busy[L1]  > 0) && (not_busy[L2]  > 0) && (not_busy[L3] == 0))
	{
		tmp = min(busy[L1],busy[L2]);
		if(tmp == busy[L1]) {return L1;}
		if(tmp == busy[L2]) {return L2;}
	}
	
	//if only one phase has capacity
	if((not_busy[L1] == 0) && (not_busy[L2] == 0) && (not_busy[L3]  > 0))
	{
		return L3;
	}
	if((not_busy[L1] == 0) && (not_busy[L2]  > 0) && (not_busy[L3] == 0 ))
	{
		return L2;
	}
	if((not_busy[L1]  > 0) && (not_busy[L2] == 0) && (not_busy[L3] == 0 ))
	{
		return L1;
	}
	//error
	return -1;
}

//set_load set the load parameter of every node if in the queue is a job in status Q (queued) and this job needs this node
void set_load(struct cherub_cluster *cluster, struct cherub_content *content)
{
	assert(cluster != NULL);
	syslog(LOG_DEBUG, "[MASTER] [set_load] start of the set_load routin");
	int busy[]={0,0,0};		//busy is how many nodes on this phase are BUSY or UNKNOWN
	int not_busy[]={0,0,0};	//not_busy is how many nodes on this phase NOT BUSY or UNKNOWN
	int n,error = 0,global_load = 0;
	/* int nodes_for_backup = 0, nodes_online = 0, nodes_to_turn_on = 0; needed for backup which is now implemented in the modules*/
	int nodes_load_size;
	int* nodes_load_list = NULL;
	syslog(LOG_DEBUG, "[MASTER] [set_load] variables are set");
	for(n = 0; n < cluster->number_of_nodes; n++)
	{	
		if(cluster->nodes[n].state != CHERUB_BUSY && cluster->nodes[n].state != CHERUB_UNKNOWN)
		{
			cluster->nodes[n].load = 0;
			if(cluster->nodes[n].power_phase == 1 )
				not_busy[L1]++;
			if(cluster->nodes[n].power_phase == 2 )
				not_busy[L2]++;
			if(cluster->nodes[n].power_phase == 3 )
				not_busy[L3]++;
		}
		else
		{
			if(cluster->nodes[n].power_phase == 1 )
				busy[L1]++;
			if(cluster->nodes[n].power_phase == 2 )
				busy[L2]++;
			if(cluster->nodes[n].power_phase == 3 )
				busy[L3]++;
		}
	}
	syslog(LOG_DEBUG, "[MASTER] [set_load] nodes who are busy or unknown detected");
	syslog(LOG_DEBUG, "[MASTER] [set_load] not_busy[L1]: %d not_busy[L2]: %d not_busy[L3]: %d",not_busy[L1],not_busy[L2],not_busy[L3]);
	//only if nodes are available, we have to set load
	/* Global Load tells us now, how many nodes to shutdown, so we can no longer ignore this stuff if all nodes are busy; TESTING */
	if((not_busy[L1]+not_busy[L2]+not_busy[L3]) >= 0)
	{
		syslog(LOG_DEBUG, "[MASTER] [set_load] not_busy[L1]+not_busy[L2]+not_busy[L3]) > 0 is TRUE");
		//find out if a node is direct selected
		//call nodes_load function if implemented
		if(content->parallel_load==1)	
		{
			syslog(LOG_DEBUG, "[MASTER] [set_load] TRY  - cherub_load_list");
			nodes_load_list = cherub_load_list(&nodes_load_size, content);
			syslog(LOG_DEBUG, "[MASTER] [set_load] DONE - cherub_load_list");
			if (nodes_load_list == NULL) 
			{
				syslog(LOG_DEBUG, "[MASTER] [set_load] nodes_load returned NULL -> this case should never occur, bacause it was testet earlier on");
				content->parallel_load=0;
			}
			else if (nodes_load_size != cluster->number_of_nodes) 
			{
				syslog(LOG_ERR, "[MASTER] [set_load] wrong size of nodes_load returned list -> fallback to node_load");
				free(nodes_load_list);
				nodes_load_list = NULL;
				content->parallel_load=0;
			}
		}
		//find out if a node is direct selected
		for(n = 0; n < cluster->number_of_nodes; n++)
		{
			if (content->parallel_load==0) {
				syslog(LOG_DEBUG, "[MASTER] [set_load] TRY  - cherub_node_load");
				error = cherub_node_load(cluster->nodes[n].name, content);
				syslog(LOG_DEBUG, "[MASTER] [set_load] DONE - cherub_node_load");
			}
			else {
				error = nodes_load_list[n];
			}
			//set only load if the node is not busy/unknown and ther is load
			if((error == 1) &&\
			(cluster->nodes[n].state != CHERUB_BUSY) &&\
			(cluster->nodes[n].state != CHERUB_UNKNOWN))
			{
				cluster->nodes[n].load = 1;
				if(cluster->nodes[n].power_phase == 1 )
				{	
					not_busy[L1]--;
					busy[L1]++;
				}
				if(cluster->nodes[n].power_phase == 2 )
				{	
					not_busy[L2]--;
					busy[L2]++;
				}
				if(cluster->nodes[n].power_phase == 3 )
				{	
					not_busy[L3]--;
					busy[L3]++;
				}
				syslog(LOG_DEBUG, "[MASTER] [set_load] found load for node %s\n",cluster->nodes[n].name);
			}
			else
			{
				syslog(LOG_DEBUG, "[MASTER] [set_load]    no load for node %s\n",cluster->nodes[n].name);
			}
		}

		if (nodes_load_list != NULL) {
			free(nodes_load_list);
			nodes_load_list = NULL;
		}

		
		//find out the global_load
		syslog(LOG_DEBUG, "[MASTER] [set_load] TRY  - global_load");
		global_load = cherub_global_load(content);
		syslog(LOG_DEBUG, "[MASTER] [set_load] DONE - global_load");
		syslog(LOG_DEBUG, "[MASTER] [set_load] global_load=%d",global_load);
		syslog(LOG_DEBUG, "[MASTER] [set_load] g(p1,p2,p3)=(%d,%d,%d)",not_busy[L1],not_busy[L2],not_busy[L3]);
		syslog(LOG_DEBUG, "[MASTER] [set_load] g(b1,b2,b3)=(%d,%d,%d)",busy[L1],busy[L2],busy[L3]);
		
		/* stuff for backup function                                                                                             */
		/* Note: Also the backup function should be part of the core, it is not possible to implement it in the core for the     */
		/*       SLB scenario. The reason is that the core has no knowleage about the nodes which has to be on, only how many    */
		/*       it should start or shutdown. We need that knowleage (e.g. requests per seconds) to compute that demand and      */
		/*       thus the number of backup nodes.                                                                                */
		
		/* nodes for backup are the once which are ONLINE, so first we cound them, then we compare it with the backup percentage */
		/* count nodes which are ONLINE                                                                                          */
		/*syslog(LOG_DEBUG, "[MASTER] [set_load] TRY  - backup");
		for(n = 0; n < cluster->number_of_nodes; n++)
		{
			if (cluster->nodes[n].state == CHERUB_ONLINE){
				nodes_online += 1;
			}
		}
		
		nodes_for_backup = (cluster->number_of_nodes*content->backup)/100;
		nodes_to_turn_on = nodes_for_backup - nodes_online;
		syslog(LOG_DEBUG, "[MASTER] [set_load] nodes_for_backup=%d",nodes_for_backup);
		syslog(LOG_DEBUG, "[MASTER] [set_load] nodes_to_turn_on=%d",nodes_to_turn_on);
		global_load += nodes_to_turn_on;
		printf("nodes_to_turn_on:\t%d\n", nodes_to_turn_on);
		printf("new_global_load:\t%d\n", global_load);
		syslog(LOG_DEBUG, "[MASTER] [set_load] DONE - backup");
		syslog(LOG_DEBUG, "[MASTER] [set_load] TRY  - while"); */

		//as long as ther is load and ther are nodes who are not busy/unknown or has already load, we can set load
		while((global_load) > 0 && (not_busy[L1]+not_busy[L2]+not_busy[L3]) > 0)
		{
			syslog(LOG_DEBUG, "[MASTER] [set_load] global_load=%d",global_load);
			syslog(LOG_DEBUG, "[MASTER] [set_load] g(p1,p2,p3)=(%d,%d,%d)",not_busy[L1],not_busy[L2],not_busy[L3]);
			syslog(LOG_DEBUG, "[MASTER] [set_load] g(b1,b2,b3)=(%d,%d,%d)",busy[L1],busy[L2],busy[L3]);
			
			int phase;
			phase = phase_to_set_on(busy, not_busy);
			syslog(LOG_DEBUG, "[MASTER] [set_load] phase_to_set_on has returnt: %s",cherub_phasen[phase]);
			assert(phase == L1 || phase == L2 || phase == L3);
			
			switch(phase)
			{
				case L1:
				{
					for(n = 0; n < cluster->number_of_nodes; n++)
					{
						//if the node is on the related phase and has no load and is not busy or unknown, set load...
						if(cluster->nodes[n].power_phase == 1 &&\
						   cluster->nodes[n].load        == 0 &&\
						   cluster->nodes[n].state       != CHERUB_BUSY &&\
						   cluster->nodes[n].state       != CHERUB_UNKNOWN)
						{
							cluster->nodes[n].load = 1;
							not_busy[L1]--;
							busy[L1]++;
							global_load--;
							syslog(LOG_DEBUG, "[MASTER] [set_load] set load for node %s\n",cluster->nodes[n].name);
							syslog(LOG_DEBUG, "[MASTER] [set_load] p1--");
							break;
						}
					}
					break;
				}
				case L2:
				{
					for(n = 0; n < cluster->number_of_nodes; n++)
					{
						//if the node is on the related phase and has no load and is not busy or unknown, set load...
						if(cluster->nodes[n].power_phase == 2 &&\
						   cluster->nodes[n].load        == 0 &&\
						   cluster->nodes[n].state       != CHERUB_BUSY &&\
						   cluster->nodes[n].state       != CHERUB_UNKNOWN)
						{
							cluster->nodes[n].load = 1;
							not_busy[L2]--;
							busy[L2]++;
							global_load--;
							syslog(LOG_DEBUG, "[MASTER] [set_load] set load for node %s\n",cluster->nodes[n].name);
							syslog(LOG_DEBUG, "[MASTER] [set_load] p2--");
							break;
						}
					}
					break;
				}
				case L3:
				{
					for(n = 0; n < cluster->number_of_nodes; n++)
					{
						//if the node is on the related phase and has no load and is not busy or unknown, set load...
						if(cluster->nodes[n].power_phase == 3 &&\
						   cluster->nodes[n].load        == 0 &&\
						   cluster->nodes[n].state       != CHERUB_BUSY &&\
						   cluster->nodes[n].state       != CHERUB_UNKNOWN)
						{
							cluster->nodes[n].load = 1;
							not_busy[L3]--;
							busy[L3]++;
							global_load--;
							syslog(LOG_DEBUG, "[MASTER] [set_load] set load for node %s\n",cluster->nodes[n].name);
							syslog(LOG_DEBUG, "[MASTER] [set_load] p3--");
							break;
						}
					}
					break;
				}
				default	:
				{
					syslog(LOG_ERR, "[MASTER] [set_load] something wrong in the powerphase algorithem");
					cherub_mail(content, "CHERUB-ERROR", "ERROR WAS LOGGED: [MASTER] [set_load] something wrong in the powerphase algorithem (please have a look into the logfile)");
				}
			}//switch
		syslog(LOG_DEBUG, "[MASTER] [set_load] DONE - while");
		}
		
		/* if sequential shutdown is auto and the load is negativ set counter accordingly */
		/* REMEMBER: If negativ load, the load function returns the number of nodes which can*/
		/* can be shutdown, NOT the current load!*/
		if (global_load <= -1 && content->sequential_shutdown <= 0) {
			content->sequential_from_online_counter = global_load * -1;
			/* make sure, one node will remain */
			if(cluster->running_nodes <= content->sequential_from_online_counter)
				content->sequential_from_online_counter = cluster->running_nodes - 1;
		}
		/* if the load is not small enought, make sure, nothing is turned off! */
		if (global_load > -1) {
			content->sequential_from_online_counter = 0;
		}
	}
}


//log all the different transitions
void log_state_change(struct cherub_cluster *cluster,int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	if(cluster->nodes[n].state == cluster->nodes[n].last_state)
	{
		cluster->nodes[n].cycles_running++;
	}
	else
	{
		cluster->nodes[n].cycles_running = 0;
		cluster->nodes[n].cycles_error = 0;
		char *log_string = "";
		//alloc space for the longest possable string (OFFLINE and UNKNOWN are the longest words with 7 characters)
		log_string = calloc(1,strlen("state change: OFFLINE --> UNKNOWN")+1);
		strncat(log_string, "state change: ", strlen("state change: "));
		strncat(log_string, cherub_states[cluster->nodes[n].last_state+2],strlen(cherub_states[cluster->nodes[n].last_state+2]));
		strncat(log_string, " --> ",strlen(" --> "));
		strncat(log_string, cherub_states[cluster->nodes[n].state+2],strlen(cherub_states[cluster->nodes[n].state+2]));
		
		//unexpected transitions are logt as warning
		if((cluster->nodes[n].last_state != CHERUB_UNKNOWN && cluster->nodes[n].state == CHERUB_UNKNOWN)||\
		   (cluster->nodes[n].last_state == CHERUB_DOWN && cluster->nodes[n].state == CHERUB_BUSY)||\
		   (cluster->nodes[n].last_state == CHERUB_BUSY && cluster->nodes[n].state == CHERUB_OFFLINE)||\
		   (cluster->nodes[n].last_state == CHERUB_BUSY && cluster->nodes[n].state == CHERUB_DOWN))
		{
			syslog(LOG_WARNING, "[%s] [%s]",cluster->nodes[n].name,log_string);
		}
		else
		{		
			syslog(LOG_INFO, "[%s] [%s]",cluster->nodes[n].name,log_string);
		}
		free(log_string);
	}
}

void if_CHERUB_UNKNOWN(struct cherub_cluster *cluster, struct cherub_content *content, int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_UNKNOWN] TRY  - state unknown procedure");
	cluster->nodes[n].load = 0;
	//after a while, log a warning!
	if(((int)cluster->nodes[n].cycles_running % content->ERROR_THRESHOLD == 0) && (cluster->nodes[n].cycles_running > 0))
	{
		syslog(LOG_WARNING, "[%s] Node state is UNKNOWN for a long time",cluster->nodes[n].name);
	}
		
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_UNKNOWN] DONE - state unknown procedure");
}

void if_CHERUB_BUSY(struct cherub_cluster *cluster, int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_BUSY] TRY  - state busy procedure");
	cluster->nodes[n].load = 0;
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_BUSY] DONE - state busy procedure");
}

void if_CHERUB_ONLINE(struct cherub_cluster *cluster, struct cherub_content *content, int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	int error = 0;
	
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] TRY  - state online procedure");
	//if the node has load, set the cycles running counter to zero
	if(cluster->nodes[n].load == 1)	{
		cluster->nodes[n].cycles_running = 0;
	}

	/*if the signoff threshold is exceeded, try to sign off the node on every cycle*/
	if((cluster->nodes[n].cycles_running >= content->SIGN_OFF_THRESHOLD) && (cluster->nodes[n].always_on != 1)) {
		/*sequential is either on or auto, count how many nodes will go down*/
		if (content->sequential_from_online_counter > 0) {
			//if nothing happens, retry after 5 minutes or on the next pollingintervall if > 5min
			int h = (int)(content->MINUTES_NORMALYSER*5);
			if(h < 1){h = 1;}
			if((int)cluster->nodes[n].cycles_error % h == 0) {
				syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] TRY  - sign off");
				/*for no action*/
				if(!(content->options & COMMANDLINE_NOACTION)) {
					error = cherub_sign_off(cluster->nodes[n].name, content);
				}
				else {
					syslog(LOG_INFO, "[MASTER] SIGN OFF --> NO ACTION");
				}
				//if the script shows no error, set the last script to sign_off, otherwise to none
				if(error == 0)
				{
					syslog(LOG_INFO, "[%s] try to SIGN OFF",cluster->nodes[n].name);
					syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] DONE - sign off");
				}
				else
				{
					syslog(LOG_ERR, "[%s] SIGN OFF script is broken",cluster->nodes[n].name);
					cherub_mail(content, "CHERUB-ERROR", "ERROR WAS LOGGED: SIGN OFF script is broken (please have a look into the logfile)");
					syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] ERROR- sign off");
				}
			}
			/* if nothing happens for a while, send set problem in signoff  */
			if((cluster->nodes[n].cycles_error > 0) && ((int)cluster->nodes[n].cycles_error % content->ERROR_THRESHOLD == 0))
			{
				syslog(LOG_ERR, "[%s] CANT SIGN OFF",cluster->nodes[n].name);
				cluster->nodes[n].signoff_problem = 1;
				content->mails_to_send = 1;
			}
			cluster->nodes[n].cycles_error++;
			
			//then decrease the counter
			content->sequential_from_online_counter--;
			//if the counter reaches 0
			if(content->sequential_from_online_counter == 0)
			{
				//reset every nodes counter if it is online
				int h;
				for(h = 0; h < cluster->number_of_nodes; h++)
				{	
					if(cluster->nodes[h].state == CHERUB_ONLINE)
					{
						cluster->nodes[h].cycles_running = 0;
						cluster->nodes[h].cycles_error   = 0;
					}
				}
			}
		}
		else
		{
			syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] sequential_shutdown threshold is reached, no further un-registers allowed");
		}
	}
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_ONLINE] DONE - state online procedure");
}

void if_CHERUB_OFFLINE(struct cherub_cluster *cluster, struct cherub_content *content, int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	int error = 0;
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_OFFLINE] TRY  - state offline procedure");
	//if the node has still no load, shutdown
	if(cluster->nodes[n].load == 0)
	{
		if(cluster->nodes[n].cycles_running >= content->SHUTDOWN_THRESHOLD)
		{
			//if nothing happens, retry after 5 minutes or on the next pollingintervall if > 5min
			int h = (int)(content->MINUTES_NORMALYSER*5);
			if(h < 1){h = 1;}
			if((int)cluster->nodes[n].cycles_error % h == 0)
			{
				if(!(content->options & COMMANDLINE_NOACTION))
				{	/*for no action*/
					error = cherub_shutdown(cluster->nodes[n].ip_address, content);
				}	
				else
				{
					syslog(LOG_INFO, "[MASTER] SHUTDOWN --> NO ACTION");
				}
				//if the script shows no error, set the last script to shutdown, otherwise to none
				if(error == 0)
				{
					syslog(LOG_INFO, "[%s] try to SHUTDOWN",cluster->nodes[n].name);
				}
				else
				{
					syslog(LOG_ERR, "[%s] SHUTDOWN script is broken",cluster->nodes[n].name);
					cherub_mail(content, "CHERUB-ERROR", "ERROR WAS LOGGED: SHUTDOWN script is broken (please have a look into the logfile)");
				}
			}
			//if it dont go down after many cycles it seems as it is not possible to shutdown
			if(cluster->nodes[n].cycles_error > 0  && ((int)cluster->nodes[n].cycles_error % content->ERROR_THRESHOLD == 0))
			{
				syslog(LOG_ERR, "[%s] CANT SHUTDOWN",cluster->nodes[n].name);
				cluster->nodes[n].shutdown_problem = 1;
				content->mails_to_send = 1;
			}
			cluster->nodes[n].cycles_error++;
		}
	}

	//if the node got load, register it
	if(cluster->nodes[n].load == 1 || (cluster->nodes[n].always_on == 1))
	{
		if(cluster->nodes[n].cycles_running >= content->REGISTER_THRESHOLD)
		{
			//if nothing happens, retry after 5 minutes or on the next pollingintervall if > 5min
			int h = (int)(content->MINUTES_NORMALYSER*5);
			if(h < 1){h = 1;}
			if((int)cluster->nodes[n].cycles_error % h == 0)
			{	
				if(!(content->options & COMMANDLINE_NOACTION))
				{	/*for no action*/
					error = cherub_register(cluster->nodes[n].name, content);
				}
				else
				{
					syslog(LOG_INFO, "[MASTER] REGISTER --> NO ACTION");
				}
				//if the script shows no error, set the last script to register, otherwise to none
				if(error == 0)
				{
					syslog(LOG_INFO, "[%s] try to REGISTER",cluster->nodes[n].name);
				}
				else
				{
					syslog(LOG_ERR, "[%s] REGISTER script is broken",cluster->nodes[n].name);
					cherub_mail(content, "CHERUB-ERROR", "ERROR WAS LOGGED: REGISTER script is broken (please have a look into the logfile)");
				}
			}
			//if it dont go down after many cycles it seems as it is not possible to register
			if((cluster->nodes[n].cycles_error > 0) && ((int)cluster->nodes[n].cycles_error % content->ERROR_THRESHOLD == 0))
			{
				syslog(LOG_ERR, "[%s] CANT REGISTER",cluster->nodes[n].name);
				cluster->nodes[n].register_problem = 1;
				content->mails_to_send = 1;
			}
			cluster->nodes[n].cycles_error++;
		}
	}
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_OFFLINE] DONE - state offline procedure");
}

void if_CHERUB_DOWN(struct cherub_cluster *cluster, struct cherub_content *content, int n)
{
	assert(cluster != NULL);
	assert(n >= 0 && n<=cluster->number_of_nodes);
	int error = 0;
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_DOWN] TRY  - state down procedure");
	//if the node is down and load appears, boot the node
	if(cluster->nodes[n].load == 1 || (cluster->nodes[n].always_on == 1))
	{
		cluster->nodes[n].cycles_under_load++;
		if(cluster->nodes[n].cycles_under_load >= content->BOOT_THRESHOLD)
		{
			//if nothing happens, retry after 5 minutes or on the next pollingintervall if > 5min
			int h = (int)(content->MINUTES_NORMALYSER*5);
			if(h < 1){h = 1;}
			if((int)cluster->nodes[n].cycles_error % h == 0)
			{
				if(!(content->options & COMMANDLINE_NOACTION))
				{	/*for no action*/
					error = cherub_boot(cluster->nodes[n].ip_address, content);
				}
				else
				{
					syslog(LOG_INFO, "[MASTER] BOOT --> NO ACTION");
				}

				if(error == 0)
				{	
					syslog(LOG_INFO, "[%s] try to BOOT",cluster->nodes[n].name);
				}
				else if(error == 2)
				{
					syslog(LOG_ERR, "[MASTER] [%s] no boot command executed, status is already on",cluster->nodes[n].name);
				}
				else if(error == 3)
				{
					syslog(LOG_ERR, "[MASTER] [%s] no boot command executed, node not reachable at the moment",cluster->nodes[n].name);
				}
				else					
				{
					syslog(LOG_ERR, "[%s] BOOT script is broken",cluster->nodes[n].name);
					cherub_mail(content, "CHERUB-ERROR", "ERROR WAS LOGGED: BOOT script is broken (please have a look into the logfile)");
				}
			}
			/* if a node can not be booted after a period of 5 minutes */
			/* start the emergency sequence and shut it down again, for another boot try */
			
			if(cluster->nodes[n].cycles_error > content->ERROR_THRESHOLD && cluster->nodes[n].emergency_routin == 0 )
			{
				cluster->nodes[n].emergency_routin = 1;
				cluster->nodes[n].cycles_error = 0;
				/* this may only work with ipmi, due to no ssh while off;-)  */
				syslog(LOG_ERR, "[%s] START EMERGENCY ROUTIN! SHUTDOWN FOR NEW BOOT TRY!",cluster->nodes[n].name);
				error = cherub_shutdown(cluster->nodes[n].ip_address, content);
				
			}
			//if it dont boot after many cycles it seems as it is not possible to boot
			if(cluster->nodes[n].cycles_error > 0  &&((int)cluster->nodes[n].cycles_error % content->ERROR_THRESHOLD == 0) && cluster->nodes[n].emergency_routin == 1)
			{	
				syslog(LOG_ERR, "[%s] CANT BOOT",cluster->nodes[n].name);
				cluster->nodes[n].boot_problem = 1;
				content->mails_to_send = 1;
			}
			cluster->nodes[n].cycles_error++;
		}
	}
	else
	{
		cluster->nodes[n].cycles_under_load = 0;
	}
	syslog(LOG_DEBUG, "[MASTER] [if_CHERUB_DOWN] DONE - state down procedure");
}

int update_cherub_cluster(struct cherub_cluster *cluster, struct cherub_content *content){
	assert(cluster != NULL);
	int n;
	int nodes_status_size;
	int* nodes_status_list = NULL;
	//if sequential_shutdown is not set to auto (<=0) save the number in a counter
	if(content->sequential_shutdown > 0)
	{
		content->sequential_from_online_counter = content->sequential_shutdown;
		/* if there are less nodes to signoff/shutdown than allowed */
		if(cluster->running_nodes <= content->sequential_from_online_counter)
			content->sequential_from_online_counter = cluster->running_nodes - 1;
	}
	/* save old state and get new one for every node, log the change */
	/* try to collect all node states in parallel                    */
	if(content->parallel_status==1)
	{
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] TRY  - cherub_status_parallel");
		nodes_status_list = cherub_status_parallel(&nodes_status_size, content);
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] DONE - cherub_status_parallel");
		if (nodes_status_list == NULL) 
		{
			syslog(LOG_ERR, "[MASTER] [update_cherub_cluster] cherub_status_parallel returned NULL -> this should not occure");
			content->parallel_status=0;
		}
		else if (nodes_status_size != cluster->number_of_nodes) {
			syslog(LOG_ERR, "[MASTER] [update_cherub_cluster] wrong size of cherub_status_parallel returned list -> fallback to cherub_status");
			free(nodes_status_list);
			nodes_status_list = NULL;
			content->parallel_status=0;
		}
	}
	cluster->busy_nodes = 0;
	cluster->online_nodes = 0;
	cluster->offline_nodes = 0;
	cluster->down_nodes = 0;
	cluster->unknown_nodes = 0;
	for(n = 0; n < cluster->number_of_nodes; n++)
	{	
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] TRY  - update of node#%i with the name %s",n,cluster->nodes[n].name);		
		/* get the current state and save the last one */
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] TRY  - save old state");
		cluster->nodes[n].last_state = cluster->nodes[n].state;		
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] DONE - save old state");
		if (content->parallel_status==0)
		{
			syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] TRY  - get the new state");
			cluster->nodes[n].state = cherub_status(cluster->nodes[n].name, content);		
			syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] DONE - get the new state"); 
		}
		else 
		{
			cluster->nodes[n].state = nodes_status_list[n];
		}
		/* if the state has not changed, inc. the counter, else reset the counter and log it */
		log_state_change(cluster, n);
		syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] DONE - update of node#%i with the name %s",n,cluster->nodes[n].name);
		switch(cluster->nodes[n].state) {
			case CHERUB_BUSY:	cluster->busy_nodes++;		break;
			case CHERUB_ONLINE:	cluster->online_nodes++;	break;
			case CHERUB_OFFLINE:	cluster->offline_nodes++;	break;
			case CHERUB_DOWN:	cluster->down_nodes++;		break;
			case CHERUB_UNKNOWN:	cluster->unknown_nodes++;	break;
			case CHERUB_ERROR:	printf("\nCould not connect to communicator. Environmental Simulation seems not active.\n");
						printf("Please Start the Simulator and try again or delete the Virtual servers from the\n");
						printf("cherub configuration. (All Nodes containing a upper case V are considered Virtual\n");
						printf("\nExiting");
						return(-2);
		}
		cluster->running_nodes = cluster->busy_nodes + cluster->online_nodes;
		
	}/* for each node */

	syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] TRY  - set_load");
	/* determine the load of each node */
	set_load(cluster, content);
	syslog(LOG_DEBUG, "[MASTER] [update_cherub_cluster] DONE - set_load");
	
	/* the Statemachine */
	for(n = 0; n < cluster->number_of_nodes; n++)
	{
		switch(cluster->nodes[n].state)
		{
			case CHERUB_UNKNOWN:	if_CHERUB_UNKNOWN(cluster, content, n);	break;
			case CHERUB_BUSY:	if_CHERUB_BUSY(cluster, n); 		break;
			case CHERUB_ONLINE:	if_CHERUB_ONLINE(cluster, content, n);	break;
			case CHERUB_OFFLINE:	if_CHERUB_OFFLINE(cluster, content, n);	break;
			case CHERUB_DOWN:	if_CHERUB_DOWN(cluster, content, n);	break;
			case CHERUB_ERROR:	printf("This should have not happend\nExiting");
						return(-2);
		}
	}
	/* if problems has occured, send a mail */
	if(content->mails_to_send != 0){
		send_problem_mails(content, cluster);
		content->mails_to_send = 0;}
	return 0;
}

static void show_help(){
	printf("\nusage: cherub [options]\n\n" \
	       "options:\n" \
		" -v,  			shows version\n" \
		" -vv,		 	print all info-outputs\n" \
		" -n, --noaction 	runs cherub in the noaction mode, that means that no action will be performed\n" \
		" -t, --test 		starts a test environment for testing single cherub actions with single compute nodes\n" \
		" -p, --use_pidfile 	will write a pid-file to prevent mutiple starts of cherub\n" \
		" -c <configfile>, --config <configfile>	take a location to an alternativ configuration file\n" \
		" -h, --help    		show this usage message and version and exit\n" \
		" -d, --start_debug_log_level	default start loglevel is INFO, this option set it to debug\n" \
		   );
	return;
}

static void show_version(){
	printf("\nCHERUB version %s \n" \
	       "Copyright (c) %s, Simon Kiertscher (kiertscher@uni-potsdam.de)\n",
	       VERSION, YEAR);
	return;
}

//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
int main(int argc, char *argv[])
{
	/* register signals */
	signal(SIGINT,signal_handler);
	signal(SIGTERM,signal_handler);
	signal(SIGHUP,signal_handler);
	signal(SIGFPE,signal_handler);
	
	/* set some log options */
	openlog("cherub", LOG_PID | LOG_NDELAY | LOG_PERROR | LOG_CONS, LOG_DAEMON); 
	
	struct cherub_content content;
	content.config = NULL;
	content.options = 0;
	
	/* for commandline options */
	int option_character, option_index, v_options = 0, start_debug_level=LOG_INFO;
	struct option   long_options[] = {
		{"help", no_argument, NULL, 'h'},
		{"noaction", no_argument, NULL, 'n'},
		{"test", no_argument, NULL, 't'},
		{"use_pidfile", no_argument, NULL, 'p'},
		{"configfile", required_argument, NULL, 'c'},
		{"start_debug_log_level", no_argument, NULL, 'd'},
		{NULL, no_argument, NULL, 0}
	};
	
	while ((option_character = getopt_long(argc, argv, "hvvntpc:d", long_options, &option_index)) != -1) {
		switch (option_character) 
		{
			case 'h': content.options = content.options | COMMANDLINE_HELP;		break;
			case 'v': v_options++;							break;
			case 'n': content.options = content.options | COMMANDLINE_NOACTION;	break;
			case 't': content.options = content.options | COMMANDLINE_TEST;		break;
			case 'p': content.options = content.options | COMMANDLINE_PIDFILE;	break;
			case 'c': 
				content.options = content.options | COMMANDLINE_CONFIG; 
				content.config = strdup(optarg);
				break;
			case 'd': start_debug_level=LOG_DEBUG;					break;
			default : content.options = content.options | COMMANDLINE_HELP;		break;
		}
	}
	
	/* default START log level is INFO, with option -d it can be set the start log level to DEBUG */
	setlogmask(LOG_UPTO(start_debug_level));

	switch (v_options)
	{
		case 1:		show_version();	remove("/var/run/cherub.pid");	exit(EXIT_SUCCESS);	break;
		case 2:		content.options = content.options | COMMANDLINE_INFO;	break;
		default :	break;
	}
	
	/* show help */
	if (content.options & COMMANDLINE_HELP) 
	{
		show_help();
		show_version();
		remove("/var/run/cherub.pid");
		exit(EXIT_SUCCESS);
	}	
	
	/* CHECK PID FILE if option is set*/
	/* for now, building pidfile stuff on my own. no correct lib found for using it */
	FILE *pidfile = NULL;
	if (content.options & COMMANDLINE_PIDFILE)
	{
		pidfile = fopen("/var/run/cherub.pid","wrx");
		if(errno==EEXIST)
		{
			syslog(LOG_ERR, "[MASTER] [main] A CHERUB INSTANS IS ALREADY RUNNING! EXITING");
			exit(EXIT_FAILURE);
		}
		if(pidfile==NULL)
		{
			syslog(LOG_ERR, "[MASTER] [main] could not write pidfile, make sure cherub runs as root,  EXITING");
			exit(EXIT_FAILURE);
		}
	}

	/* get the inits from the python config file */
	cherub_load_config(&content);


	//#####################################################################################################################################################
	//##################################################### START REAL MAIN ###############################################################################
	//#####################################################################################################################################################

		
	//create the main structure 
	struct cherub_cluster cluster;
	int error = 0;
	struct timespec loop_begin_time, loop_end_time;
	long loopduration;
	memset(&cluster, 0, sizeof(struct cherub_cluster));

	/* IF WE USE TEST */
	if(content.options & COMMANDLINE_TEST)
	{
		if (content.options & COMMANDLINE_PIDFILE)
		{
			fprintf(pidfile,"%d\n",getpid());
			syslog(LOG_DEBUG, "[MASTER] [main] pidfile created");	
			fclose(pidfile);
		}
		syslog(LOG_INFO, "[MASER] TEST IS RUNNING");
		error = cherub_init(&cluster, &content);
		if(error != 0){
			syslog(LOG_ERR, "[MASTER] [main] Error on cherub init");
			goto cleanup_on_error;
		}
		test(&cluster, &content);
		goto thisistheend;
	}

	cherub_mail(&content, "CHERUB-STATUS", "CHERUB was startet");

	double round = 0;
	int n=0;
	int cluster_counter[5]={0,0,0,0,0}; //count number of nodes in the different states
	error = cherub_init(&cluster, &content);
	if(error == 1)
	{
		syslog(LOG_ERR, "[MASTER] error on cherub_init");
		cherub_mail(&content, "CHERUB-ERROR", "ERROR WAS LOGGED: error on cherub_init (please have a look into the logfile)");
		goto cleanup_on_error;
	}
	syslog(LOG_INFO, "[MASTER] ########### START ###########");
	
	//daemonize
	//if verbose give the outputs
	if(content.options & COMMANDLINE_INFO)
	{
		//FROM MANPAGE OF DAEMON:
		//Unless the argument nochdir is non-zero, daemon() changes the current
		//working directory to the root (/). 
		//wenn das Argument nochdir nicht null ist, wird das directory nicht verndert
		//Unless the argument noclose is non-zero, daemon() will redirect standard
		//input, standard output, and standard error to /dev/null.
		//wenn das Argument noclose nicht null ist, wird redirected

		if (daemon(1, 1) == -1) {
			syslog(LOG_ERR, "[MASTER] failed to daemonize: %m (%s at line %d)", __FILE__, __LINE__);
			cherub_mail(&content, "CHERUB-ERROR", "ERROR WAS LOGGED: failed to daemonize (please have a look into the logfile)");
		}	

	}
	else
	{
		syslog(LOG_INFO, "[MASTER] daemonizing now, this is the last syslog at stdout. For more information, look into /var/log/message or what ever you have configured.");
		if (daemon(1, 0) == -1) {
			syslog(LOG_ERR, "[MASTER] failed to daemonize: %m (%s at line %d)", __FILE__, __LINE__);
			cherub_mail(&content, "CHERUB-ERROR", "ERROR WAS LOGGED: failed to daemonize (please have a look into the logfile)");
		}	
	}
	
	
	/* write pid in pidfile (daemonizing changes the pid by +3)*/
	if (content.options & COMMANDLINE_PIDFILE)
	{
		fprintf(pidfile,"%d\n",getpid());
		syslog(LOG_DEBUG, "[MASTER] [main] pidfile created");	
		fclose(pidfile);
	}

	//this is the main loop, until a terminate signal is send
	while(cherub_terminate == 0) // MAIN ROUTINE
	{
		
		clock_gettime(CLOCK_MONOTONIC ,&loop_begin_time);
		
		//main function to set states, set the load and restart the scheduler
		if(update_cherub_cluster(&cluster, &content)==-2)
			goto cleanup_on_error;
		//interesting output if verbose is set
		if(content.options & COMMANDLINE_INFO)
		{
			printf("###################### ROUND%6.0f ###################################################################\n",round);
			cluster_counter[0]=0;cluster_counter[1]=0;cluster_counter[2]=0;cluster_counter[3]=0;cluster_counter[4]=0;
			for(n=0;n < cluster.number_of_nodes; n++){
				printf("### %10s # %5.0f cycles ",cluster.nodes[n].name, cluster.nodes[n].cycles_running);
				switch(cluster.nodes[n].state)//33
				{
					case -1: printf(" UNKNOWN"); cluster_counter[0]++; break;
					case  0: printf("    BUSY"); cluster_counter[1]++; break;
					case  1: printf("  ONLINE"); cluster_counter[2]++; break;
					case  2: printf(" OFFLINE"); cluster_counter[3]++; break;
					case  3: printf("  - DOWN"); cluster_counter[4]++; break;
					case CHERUB_ERROR:	printf("This should have not happend\nExiting");
								return(-2);
				}//8
				printf(" # load %i ",cluster.nodes[n].load);//10
				if (n%2==1){
					printf("###\n");
				}
			}
			printf("##### Unknown:%3.1d # Busy:%3.1d # Online:%3.1d # Offline:%3.1d # Down:%3.1d ################################\n",cluster_counter[0],cluster_counter[1],cluster_counter[2],cluster_counter[3],cluster_counter[4]);
		}
		
		/*Dynamic Cycle is not implemented yet and thus omitted */
		/*if(content.dynamic_update == 1)
			adjust_update_cycle(content.UPDATE_CYCLE, &content ); */
		
		clock_gettime(CLOCK_MONOTONIC, &loop_end_time);
		/* seconds multiplied by 1.000.000.000 for conversion to nsec */
		loopduration = ((loop_end_time.tv_sec * 1000000000) + loop_end_time.tv_nsec) - ((loop_begin_time.tv_sec * 1000000000) + loop_begin_time.tv_nsec);
		/* sleep for the rest of the update cycle (the time is reduced by the time )  */
		/* and division by 1000 to convert to nsec again                              */
		printf("Loopduration was:\t%lu\n",loopduration);
		if (((long)((content.UPDATE_CYCLE * 1000000) - (loopduration / 1000))) > 0){
			printf("Sleeping for:\t%lu\n", (long)((content.UPDATE_CYCLE * 1000000) - (loopduration / 1000)));
			usleep((useconds_t)(content.UPDATE_CYCLE * 1000000 - loopduration / 1000));
		}
		round++;
		//if the reload signal was send
		if(cherub_reload == 1)
		{	
			cherub_reload = 0;
			syslog(LOG_INFO, "[MASTER] [RELOAD]");
			if(cherub_load_config(&content) == -1)
			{
				syslog(LOG_ERR, "[MASTER] COULD NOT LOAD THE CONFIG");
				cherub_mail(&content, "CHERUB-ERROR", "ERROR WAS LOGGED: COULD NOT LOAD THE CONFIG (please have a look into the logfile)");
				goto cleanup_on_error;
			}
			syslog(LOG_DEBUG, "[MASTER] [RELOAD - DONE]");
		}
	}

	for(n=0;n < cluster.number_of_nodes; n++)
	{
		error = cherub_boot(cluster.nodes[n].ip_address, &content); 		
		syslog(LOG_INFO, "[MASTER] try to BOOT %s\n",cluster.nodes[n].name);
	}
		
	for(n=0;n < cluster.number_of_nodes; n++)
	{
		error = cherub_register(cluster.nodes[n].name, &content); 		
		syslog(LOG_INFO, "[MASTER] try to REGISTER %s\n",cluster.nodes[n].name);
	}

	cherub_mail(&content, "CHERUB-STATUS", "CHERUB was terminated");
	
	thisistheend: ;

	/* if the terminate signal was send */
	syslog(LOG_WARNING, "[MASTER] TERMINATE");
	syslog(LOG_DEBUG, "[MASTER] [main] TRY  - cherub_finalize");
	if(content.options & COMMANDLINE_NOACTION)
	{	/*for no action*/
		cherub_finalize(&cluster, &content, 1);
	}
	else
	{	/*for action*/
		cherub_finalize(&cluster, &content, 0);
	}
	syslog(LOG_DEBUG, "[MASTER] [main] DONE - cherub_finalize");
	if (content.options & COMMANDLINE_PIDFILE)
	{
		syslog(LOG_DEBUG, "[MASTER] [main] TRY  - cleanup pid file");
		remove("/var/run/cherub.pid");
		syslog(LOG_DEBUG, "[MASTER] [main] DONE - cleanup pid file");
	}

	syslog(LOG_INFO, "[MASTER] --- EXIT ---");
	exit(EXIT_SUCCESS);

	cleanup_on_error: ;
	if (content.options & COMMANDLINE_PIDFILE)
	{
		remove("/var/run/cherub.pid");
	}
	cherub_mail(&content, "CHERUB-STATUS", "CHERUB EXIT ON ERROR (please have a look into the logfile)");
	exit(EXIT_FAILURE);
}
