/*  jobcenter.h
 *
 *  Copyright (C) 2010-2012 Andreas von Manteuffel
 *  Copyright (C) 2010-2012 Cedric Studerus
 *
 *  This file is part of the package Reduze 2.
 *  It is distributed under the GNU General Public License version 3
 *  (see the file GPL-3.0.txt or http://www.gnu.org/licenses/gpl-3.0.txt).
 */

#ifdef HAVE_MPI

#ifndef JOBCENTER_H_
#define JOBCENTER_H_

#include <mpi.h>
#include <string>
#include <queue>
#include "jobqueue.h"
#include "functions.h" // Timer
namespace Reduze {

/// Load balancing scheduler, assigns work to customers
/** A customer can take different roles:
 ** 1. It acts as a manager and processes a Job from the JobQueue.
 **    In this case it can optionally register for some time at the JobCenter
 **    as an employer and request workers to help performing the Job.
 ** 2. It acts as a worker and helps an employer to perform a Job.
 **
 ** Workers are distributed amongst employers approximately according to
 **   #workers = const * [Pi/2 - atan(exp(-eff-eff0)*c)]
 ** with eff=efficiency of other workers of the job; eff0 and c are
 ** (hard-coded) constant parameters. Additional constraints modify this
 ** distribution.
 **/
class JobCenter {
public:
	JobCenter(MPI::Intracomm* comm, JobQueue* jobqueue);
	virtual ~JobCenter();
	void run();

	/// TODO: make these user configurable

	/// parameter eff0 in worker distribution (Pi/2 - atan(exp(-(eff-eff0)*c)))
	double worker_distribution_halfwidth_efficiency_;
	/// parameter c in worker distribution (Pi/2 - atan(exp(-(eff-eff0)*c)))
	double worker_distribution_steepness_;

	/// commands sent by JobCenter and status sent by JobCenterCustomer
	enum Cmd {
		CmdPerformJob = 444,
		CmdExit,
		CmdPerformanceData,
		CmdAssignWorker,
		CmdWorkerQuota,
		CmdContinue,
		StatusFinished,
		StatusRegisterAsEmployer,
		StatusUnregisterAsEmployer
	};

	static const int TagCustomer = 123;
	static const int TagEmployer = 234;
	void print_info();
	std::string get_info(int manager_rank);

	/// a string description of a job
	std::string job_string(const Job*) const;

private:
	void run_inserted_jobs(CountdownTimer& terminate_timer,
			CountdownTimer& analysis_timer, MPI::Status& status);
	/// returns the rank of the optimal employer for an idle customer
	/** zero means the customer should become an employer with a new job **/
	int find_best_employer();
	/// returns rank of an employer who should release worker in favor of someone else
	//int find_bad_employer();
	/// processes a status change of a general customer
	void process_customer_status(int status, int from);
	/// sends a command to an employer
	bool send_employer_command(int cmd, int to);
	/// assigns customer a job to work on
	/** to be called directly if customer becomes manager
	 ** and indirectly through assign_worker if customer becomes worker **/
	void assign_job(Job* job, int manager, int customer);
	/// assigns a worker to a job
	bool assign_worker(int employer, int worker);

	// number of available workers if all managers have minimal number of procs
	int find_num_free_workers() const;
	// number of available workers if all employers have minimal number of procs
	int find_num_distributable_workers() const;

	/// updates num_processes_optimal for all performance entries
	void update_worker_distribution();
	/// returns true if a new job and enough workers to run it are available
	bool can_start_new_job() const;

private:
	unsigned n_customers; /// total number of customers in the communicator
	std::queue<int> idle_customers;
	MPI::Intracomm* comm;
	JobQueue* jobqueue_;
	struct ManagerPerformance {
		ManagerPerformance() :
					is_employer(false),//
					progress(0.), load_efficiency(0.), worker_efficiency(0.),
					wants_more_workers(true), num_processes(1),
					num_processes_optimal(1), //
					walltime(0.), tot_cputime(0.), tot_walltime(0.) {
		}
		void update_walltimes();
		/// whether the manager is registered as an employer
		bool is_employer;
		/// fraction of finished work
		double progress;
		/// relative efficiency of workers w.r.t. number of workers
		/** note: default value for load_efficiency is 0. in order to collect
		 ** measurements before requesting large amounts of free workers */
		double load_efficiency;
		/// absolute efficiency of workers
		/** note: default value for worker_efficiency is 0. in order to collect
		 ** measurements before requesting large amounts of free workers */
		double worker_efficiency;
		/// whether the manager wants new workers
		bool wants_more_workers;
		/// number of processes incl. manager working on the manager's job
		int num_processes;
		/// target number of processes
		int num_processes_optimal;
		/// timings
		double walltime, tot_cputime, tot_walltime;
		Timer timer;
	};
	/// performance data for managers of running jobs indexed by rank of manager
	std::map<int, ManagerPerformance> performance;
	/// running jobs indexed by rank of manager
	std::map<int, Job*> job;
	/// employers for customers
	std::map<int, int> employers;
};

/// customer at the JobCenter, can take role as a worker or a manager/employer
class JobCenterCustomer {
public:
	JobCenterCustomer(MPI::Intracomm* comm, int jobcenter_rank);
	void run();
private:
	MPI::Intracomm* comm;
	int jobcenter_rank;
};

}

#endif /* JOBCENTER_H_ */

#endif // HAVE_MPI
