/*
 *   (C) Copyright IBM Corp. 2001, 2003
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: raid5_funcs.c
 *
 * Description: This file contains all MD RAID5's plugin functions.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include <sys/ioctl.h>

#include "md.h"
#include "raid5_mgr.h"
#include "raid5_discover.h"

#define my_plugin_record raid5_plugin

static int raid5_add_spare_disk_to_region(md_volume_t * volume, storage_object_t *spare, int i)
{
	int rc=0;
	mdp_disk_t       * disk = NULL;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY();
	rc = md_clone_superblock(volume, i);
	if (!rc) {

		volume->child_object[i] = spare;
		md_append_region_to_object(volume->region, spare);
	
		disk = &volume->super_block->disks[i];
		disk->major = spare->dev_major;
		disk->minor = spare->dev_minor;
		disk->number = i;
		disk->raid_disk = i;

		if (volume->flags & MD_DEGRADED) {
			disk->state = ((1<<MD_DISK_NEW) | (1<<MD_DISK_PENDING_ACTIVE));
			volume->region_mgr_flags |= (MD_RAID5_CONFIG_CHANGE_PENDING | MD_RAID5_IGNORE_VERIFY);
		} else {
			disk->state = 0;
		}

		/* Update the RAID5 private configuration. */
		conf->disks[i].dev = spare;
		conf->disks[i].operational = 0;
		conf->disks[i].number = i;
		conf->disks[i].raid_disk = i;
		conf->disks[i].write_only = 0;
		conf->disks[i].spare = 1;
		conf->disks[i].used_slot = 1;
	
		conf->spare_disks++;
		if (conf->spare.used_slot == 0) {
			conf->spare = conf->disks[i];
		}

	}
	
	/* Update the master superblock fields. */
	volume->super_block->spare_disks++;
	volume->super_block->working_disks++;
	//volume->super_block->nr_disks = spare_index + 1;
	volume->super_block->nr_disks++;

	/* Update the volume counts. */
	volume->nr_disks++;

	LOG_EXIT_INT(rc);
	return rc;
}

static void raid5_remove_spare_from_region(md_volume_t * volume, int i)
{
	int k;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY();

	md_remove_region_from_object(volume->region, volume->child_object[i]);
	KILL_SECTORS(volume->child_object[i],
		     MD_NEW_SIZE_SECTORS(volume->child_object[i]->size),
		     MD_RESERVED_SECTORS);

	volume->child_object[i] = NULL;
	EngFncs->engine_free(volume->super_array[i]);
	volume->super_array[i] = NULL;

	memset(&volume->super_block->disks[i], 0, sizeof(mdp_disk_t));

	/*
	 * Collapse super array, object array, and configuration
	 * array, and update the moved disks with their new
	 * numbers.
	 */
	for (k = i; k < MAX_MD_DEVICES - 1; k++) {
		volume->super_array[k]= volume->super_array[k+1];
		volume->child_object[k] = volume->child_object[k+1];
		volume->stale_object[k] = volume->child_object[k+1];
		volume->super_block->disks[k]= volume->super_block->disks[k+1];
		volume->super_block->disks[k].number = k;
		volume->super_block->disks[k].raid_disk = k;
		conf->disks[k] = conf->disks[k+1];
		conf->disks[k].number = k;
		conf->disks[k].raid_disk = k;
	}

	volume->super_array[k]= NULL;
	volume->stale_object[k]= NULL;
	volume->child_object[k] = NULL;

	conf->disks[k].dev = NULL;
	conf->disks[k].operational = 0;
	conf->disks[k].used_slot = 0;

	/*
	 * The disk that now occupies the old spare's slot is
	 * either a different spare or nothing.  Either way,
	 * copy it to the spare disk in the configuration.
	 */
	conf->spare = conf->disks[i];

	
	volume->super_block->working_disks--;
	volume->super_block->spare_disks--;
	volume->super_block->nr_disks--;

	volume->nr_disks--;

	LOG_EXIT_VOID();
}

static int free_disk_info_post_ioctl(md_volume_t *volume, md_ioctl_pkg_t *pkg)
{
	LOG_ENTRY();
	if (pkg->parm.disk_info) {
		EngFncs->engine_free(pkg->parm.disk_info);
		pkg->parm.disk_info = NULL;
	}
	LOG_EXIT_INT(0);
	return 0;
}

static int add_spare_post_ioctl(md_volume_t *volume, md_ioctl_pkg_t *pkg)
{
	int rc = 0;
	LOG_ENTRY();
	/*
	 * Check to see if we need to cancel EVMS_MD_ADD
	 */
	if (pkg->cmd == EVMS_MD_CANCEL_OPERATION) {
		raid5_remove_spare_from_region(volume, pkg->parm.disk_info->number);
	}

	rc = free_disk_info_post_ioctl(volume, pkg);
	volume->region_mgr_flags &= ~(MD_RAID5_CONFIG_CHANGE_PENDING | MD_RAID5_IGNORE_VERIFY);
	LOG_EXIT_INT(rc);
	return rc;
}


int raid5_add_spare_disk(md_volume_t * volume, storage_object_t * spare)
{
	int                rc = 0;
	int                spare_index;
	evms_md_ioctl_parm_t parm;
	evms_md_disk_info_t *disk_info = NULL;


	LOG_ENTRY();

	//spare_index = max(volume->super_block->nr_disks, volume->super_block->raid_disks);
	spare_index = find_empty_slot(volume->super_block);
	if (spare_index==MAX_MD_DEVICES) {
		LOG_EXIT_INT(ENODEV);
		return ENODEV;
	}

	if (md_is_region_active(volume->region)) {

		disk_info = EngFncs->engine_alloc(sizeof(evms_md_disk_info_t));
		if ( !disk_info ) {
			LOG_CRITICAL("No Memory\n");
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}

		disk_info->number = spare_index;
		disk_info->object = spare;
		parm.disk_info = disk_info;
		rc = schedule_md_ioctl_pkg(volume, EVMS_MD_ADD, &parm, add_spare_post_ioctl);
		if (rc) {
			goto error_free_mem;
		}

		rc = raid5_add_spare_disk_to_region(volume,spare,spare_index);
		
	} else {
		rc = raid5_add_spare_disk_to_region(volume,spare,spare_index);
		if (!rc) {
			volume->flags |= MD_DIRTY;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;

error_free_mem:
	if (disk_info) {
		EngFncs->engine_free(disk_info);
	}
	LOG_EXIT_INT(rc);
	return rc;

}

int raid5_remove_spare_disk(md_volume_t * volume, storage_object_t * spare)
{
	int            rc = 0;
	int            i;
	boolean        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	evms_md_disk_info_t *disk_info = NULL;
	evms_md_ioctl_parm_t parm;
	mdu_disk_info_t info;


	LOG_ENTRY();

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == spare) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is a spare. */
			if (!disk_spare(&volume->super_block->disks[i])) {
				LOG_ERROR("Object %s is not a spare disk in RAID array %s.\n", spare->name, volume->region->name);
				LOG_EXIT_INT(EINVAL);
				return EINVAL;
			}
			break;
		}
	}
	
	if (!found) {
		LOG_ERROR("Object %s is not part of RAID array %s.\n", spare->name, volume->region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}


	if (md_is_region_active(volume->region)) {

		disk_info = EngFncs->engine_alloc(sizeof(evms_md_disk_info_t));
		if ( !disk_info ) {
			LOG_CRITICAL("No Memory\n");
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}
	
		disk_info->number = i;
		disk_info->major = spare->dev_major;
		disk_info->minor = spare->dev_minor;
		disk_info->object = spare;
		parm.disk_info = disk_info;

		/*
		 * If we are removing a spare which has just been added,
		 * don't shechule the ioctl.
		 */
		if (remove_scheduled_md_ioctl_pkg(volume, EVMS_MD_ADD, &parm) == TRUE) {
			rc = 0;
			goto out_free_mem;
		}
		
		info.number = i;
		rc = md_ioctl_get_disk_info(volume->region, &info);
		if (rc) {
			rc = ENODEV;
			goto out_free_mem;
		} else {
			if ((info.major != spare->dev_major) || (info.minor != spare->dev_minor)) {
					MESSAGE("WARNING: Region:%s, Device:%s: Index:%d\n"
						" There is a mismatch major/minor, Kernel MD driver has (%d:%d), EVMS has (%d:%d)."
						"  However, if %s was created by another MD tool such as mdadm or raidtools,"
						" the operation will succeed.\n",
					  volume->name, spare->name, i,
					  info.major, info.minor,
					  spare->dev_major, spare->dev_minor,
					  volume->name);
				//rc = EINVAL;
				//goto out_free_mem;
				disk_info->major = info.major;
				disk_info->minor = info.minor;
			}
		}

		
		schedule_md_ioctl_pkg(volume, EVMS_MD_REMOVE, &parm, free_disk_info_post_ioctl);

		md_remove_region_from_object(volume->region, volume->child_object[i]);
		KILL_SECTORS(volume->child_object[i],
			     MD_NEW_SIZE_SECTORS(volume->child_object[i]->size),
			     MD_RESERVED_SECTORS);
	
		volume->child_object[i] = NULL;
		EngFncs->engine_free(volume->super_array[i]);
		volume->super_array[i] = NULL;

		memset(&volume->super_block->disks[i], 0, sizeof(mdp_disk_t));

		conf->disks[i].dev = NULL;
		conf->disks[i].operational = 0;
		conf->disks[i].used_slot = 0;

		/* Next spare or nothing */
		conf->spare = conf->disks[i+1];
		
		volume->super_block->working_disks--;
		volume->super_block->spare_disks--;
		volume->super_block->nr_disks--;

		volume->nr_disks--;

	} else {
		raid5_remove_spare_from_region(volume, i);
		volume->flags |= MD_DIRTY;
	}

	LOG_EXIT_INT(0);
	return 0;

out_free_mem:
	if (disk_info) {
		EngFncs->engine_free(disk_info);
	}

	LOG_EXIT_INT(rc);
	return rc;
}


int raid5_remove_faulty_disk(md_volume_t * volume, storage_object_t * faulty)
{

	int            rc = 0;
	int            i, k;
	boolean        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	evms_md_disk_info_t *disk_info;
	evms_md_ioctl_parm_t parm;
	mdu_disk_info_t info;

	LOG_ENTRY();

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == faulty) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is faulty.*/
			if (!(volume->super_block->disks[i].state & ((1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED)))) {
				LOG_ERROR("Object %s is not marked faulty in RAID array %s.\n", faulty->name, volume->region->name);
				LOG_EXIT_INT(EINVAL);
				return EINVAL;
			}

			disk_info = EngFncs->engine_alloc(sizeof(evms_md_disk_info_t));
			if ( !disk_info ) {
				LOG_CRITICAL("No Memory\n");
				LOG_EXIT_INT(ENOMEM);
				return ENOMEM;
			}

			disk_info->number = i;
			disk_info->major = faulty->dev_major;
			disk_info->minor = faulty->dev_minor;
			disk_info->object = faulty;
			parm.disk_info = disk_info;
			schedule_md_ioctl_pkg(volume, EVMS_MD_REMOVE, &parm, free_disk_info_post_ioctl);
		
			info.number = i;
			if (md_ioctl_get_disk_info(volume->region, &info) == 0) {
				if ((info.major != faulty->dev_major) || (info.minor != faulty->dev_minor)) {
					MESSAGE("WARNING: Region:%s, Device:%s: Index:%d\n"
						" There is a mismatch major/minor, Kernel MD driver has (%d:%d), EVMS has (%d:%d)."
						"  However, if %s was created by another MD tool such as mdadm or raidtools,"
						" the operation will succeed.\n",
						  volume->name, faulty->name, i,
						  info.major, info.minor,
						  faulty->dev_major, faulty->dev_minor,
						  volume->name);
					//rc = EINVAL;
					//goto out_free_mem;
					disk_info->major = info.major;
					disk_info->minor = info.minor;
			}
				
			}

			md_remove_region_from_object(volume->region, volume->child_object[i]);
			KILL_SECTORS(volume->child_object[i],
				     MD_NEW_SIZE_SECTORS(volume->child_object[i]->size),
				     MD_RESERVED_SECTORS);
			volume->child_object[i] = NULL;
			EngFncs->engine_free(volume->super_array[i]);
			volume->super_array[i] = NULL;

			if (i < volume->super_block->raid_disks) {
				/*
				 * The faulty disk is part of the RAID array.
				 * Make sure its entry is marked faulty and
				 * removed.  The code above wiped out the
				 * superblock on the faulty disk so that the
				 * disk will no longer be discovered as part of
				 * this array.  The volume's nr_disks and
				 * failed_disks counts stay the same since RAID5
				 * counts faulty disks in the array as being
				 * disks anyway.
				 */
				volume->super_block->disks[i].state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);

			} else {
				/* The faulty disk is past the end of the RAID
				 * array (e.g.  it was a failed disk that was
				 * replaced by a spare disk and the failed disk
				 * was moved to the former spare disk slot).
				 * Remove the disk and collapse any remaining
				 * entries.
				 */
				for (k = i; k < volume->super_block->nr_disks - 1; k++) {
					volume->super_array[k]= volume->super_array[k+1];
					volume->child_object[k] = volume->child_object[k+1];
					volume->super_block->disks[k]= volume->super_block->disks[k+1];
					volume->super_block->disks[k].number = k;
					volume->super_block->disks[k].raid_disk = k;
					conf->disks[k] = conf->disks[k+1];
					conf->disks[k].number = k;
					conf->disks[k].raid_disk = k;

					/* Reset the spare entry if it got moved. */
					if (conf->spare.raid_disk == k+1) {
						conf->spare = conf->disks[k];
					}
				}

				volume->super_array[k]= NULL;
				volume->child_object[k] = NULL;

				/* Zero out now empty disk entry. */
				memset(&volume->super_block->disks[k], 0, sizeof(mdp_disk_t));

				conf->disks[k].dev = NULL;
				conf->disks[k].operational = 0;
				conf->disks[k].used_slot = 0;
				conf->failed_disks--;

				/* Update disk counts. */
				volume->super_block->nr_disks--;
				volume->super_block->failed_disks--;
			}

			volume->nr_disks--;
		}
	}

	if (!found) {
		LOG_ERROR("Object %s is not part of RAID array %s.\n", faulty->name, volume->region->name);
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

int raid5_remove_stale_disk(md_volume_t * volume, storage_object_t * stale_disk)
{
	int i, rc=0;
	storage_object_t *obj;
	LOG_ENTRY();

	if (!volume || !stale_disk) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	for (i = 0; i < MAX_MD_DEVICES; i++ ) {
		
		obj = volume->stale_object[i];
		
		if (obj == stale_disk) {
			break;
		}
	}

	if ((i == MAX_MD_DEVICES) || !obj || !volume->stale_disks) {
		md_log_internal_bug(__FILE__, __FUNCTION__, __LINE__);
		LOG_EXIT_INT(ENOSYS);
		return ENOSYS;
	}

	memset(&volume->super_block->disks[i],0,sizeof(mdp_disk_t));

	md_remove_region_from_object(volume->region, obj);
	KILL_SECTORS(obj, MD_NEW_SIZE_SECTORS(obj->size), MD_RESERVED_SECTORS);
	EngFncs->engine_free(volume->super_array[i]);

	volume->super_array[i] = NULL;
	volume->stale_object[i] = NULL;
	
	volume->stale_disks--;

	LOG_EXIT_INT(rc);
	return rc;
}


static int mark_disk_faulty_post_ioctl(md_volume_t *volume, md_ioctl_pkg_t *pkg)
{
	if (pkg->parm.disk_info) {
		EngFncs->engine_free(pkg->parm.disk_info);
		pkg->parm.disk_info = NULL;
	}
	volume->region_mgr_flags &= ~(MD_RAID5_CONFIG_CHANGE_PENDING | MD_RAID5_IGNORE_VERIFY);
	return 0;
}


int raid5_mark_faulty_disk(md_volume_t * volume, storage_object_t * active)
{
	int            rc = 0;
	int            i;
	boolean        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	mdp_super_t *sb;
	evms_md_disk_info_t *disk_info;
	evms_md_ioctl_parm_t parm;
	mdu_disk_info_t info;

	LOG_ENTRY();

	/*
	 * Make sure there is a spare disk available before we remove an active
	 * disk.
	 */
	if (!volume) {
		LOG_EXIT_INT(EFAULT);
		return EFAULT;
	}

	sb = volume->super_block;

	if (sb->spare_disks == 0) {
		LOG_ERROR("RAID array %s had no spare disks available.\n", volume->region->name);
		LOG_EXIT_INT(EINVAL);
		return EINVAL;
	}

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == active) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is active.*/
			if (!disk_active(&sb->disks[i])) {
				LOG_ERROR("Object %s is not marked active in RAID array %s.\n", active->name, volume->region->name);
				LOG_EXIT_INT(EINVAL);
				return EINVAL;
			}
			break;
		}
	}

	if (!found) {
		LOG_ERROR("Object %s is not part of RAID array %s.\n", active->name, volume->region->name);
		LOG_EXIT_INT(ENODEV);
		return ENODEV;
	}

	info.number = i;
	rc = md_ioctl_get_disk_info(volume->region, &info);
	if (rc) {
		LOG_ERROR("(%s) does not exist\n", active->name);
		LOG_EXIT_INT(ENODEV);
		return ENODEV;
	} else {
		if ((info.major != active->dev_major) || (info.minor != active->dev_minor)) {
			LOG_WARNING("(%s) mismatch major/minor, kernel(%d:%d), active(%d:%d)\n",
				  active->name, info.major, info.minor, active->dev_major, active->dev_minor);
			//LOG_EXIT_INT(EINVAL);
			//return EINVAL;
		}
	}

	disk_info = EngFncs->engine_alloc(sizeof(evms_md_disk_info_t));
	if ( !disk_info ) {
		LOG_CRITICAL("No Memory\n");
		LOG_EXIT_INT(ENOMEM);
		return ENOMEM;
	}

	disk_info->number = i;
	disk_info->major = info.major;
	disk_info->minor = info.minor;
	disk_info->object = active;
	parm.disk_info = disk_info;
	schedule_md_ioctl_pkg(volume, EVMS_MD_DEACTIVATE, &parm, mark_disk_faulty_post_ioctl);

	/*
	 * Mark the disk entry faulty.  That will cause the
	 * MD recovery to swap it out with a spare.
	 */
	sb->disks[i].state = (1 << MD_DISK_FAULTY);

	conf->disks[sb->disks[i].raid_disk].operational = 0;

	/* Update disk counts. */
	sb->active_disks--;
	sb->working_disks--;
	sb->failed_disks++;

	volume->region_mgr_flags |= (MD_RAID5_CONFIG_CHANGE_PENDING | MD_RAID5_IGNORE_VERIFY);
	LOG_EXIT_INT(0);
	return 0;
}

static int fix_array_post_activate(md_volume_t *volume, md_ioctl_pkg_t *pkg)
{
	volume->region_mgr_flags &= ~MD_RAID5_CONFIG_CHANGE_PENDING;
	return 0;
}

int raid5_fix_array(md_volume_t *volume)
{
	int rc = 0;
	
	LOG_ENTRY();

	raid5_verify_and_fix_array(volume, 0);
	if (volume->flags & (MD_CORRUPT | MD_PROBLEMATIC_SB)) {
		raid5_verify_and_fix_array(volume, 2);
		if ((volume->flags & MD_CORRUPT) == 0) {
			volume->region_mgr_flags |= MD_RAID5_CONFIG_CHANGE_PENDING;
			schedule_md_ioctl_pkg(volume, EVMS_MD_INVOKE_CALLBACK, NULL, fix_array_post_activate);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

