/*
 *   (C) Copyright IBM Corp. 2001, 2003
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: linear_discover.c
 *
 * Description: This file contains all functions related to the initial
 *              discovery of linear MD physical volumes and logical
 *              volumes.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>

#include "md.h"
#include "linear_discover.h"

#define my_plugin_record linear_plugin

static int linear_compare_targets(md_volume_t * volume)
{
	dm_target_t *target, *targets = NULL;
	dm_device_t *dev;
	u_int64_t offset = 0;
	int rc, i;

	LOG_ENTRY();

	rc = EngFncs->dm_get_targets(volume->region, &targets);
	if (rc) {
		goto out;
	}

	for (i = 0, target = targets;
	     i < MAX_MD_DEVICES && target; i++) {
		if (volume->child_object[i]) {
			if (! (target->start == offset &&
			       target->length == MD_NEW_SIZE_SECTORS(volume->child_object[i]->size) &&
			       target->type == DM_TARGET_LINEAR) ) {
				rc = EINVAL;
				goto out;
			}

			dev = target->data.linear;
			if (! (dev->major == volume->child_object[i]->dev_major &&
			       dev->minor == volume->child_object[i]->dev_minor &&
			       dev->start == 0) ) {
				rc = EINVAL;
				goto out;
			}
			target = target->next;
			offset += MD_NEW_SIZE_SECTORS(volume->child_object[i]->size);
		}
	}

	if (target || i < volume->nr_disks) {
		rc = EINVAL;
	}

out:
	EngFncs->dm_deallocate_targets(targets);
	LOG_EXIT_INT(rc);		 	
	return rc;
}

int linear_create_region(md_volume_t * volume, list_anchor_t output_list, boolean final_call){
	int rc = 0;
	storage_object_t * region;
	int found = 0;
	int i, j = -1;

	LOG_ENTRY();

       	if ((!volume->super_block || (volume->nr_disks !=  volume->super_block->nr_disks)) &&
       	    !final_call) {
       		LOG_DETAILS("Region %s. missing members, delaying discovery\n",volume->name);
       		LOG_EXIT_INT(0);
		return 0;
       	}

	LOG_DETAILS("Discovered region %s.\n",volume->name);
	if ((rc = EngFncs->allocate_region(volume->name, &region))){
		for (j = MAX_MD_MINORS -1;(rc != 0) && (j >=0) ; j--) {
			sprintf(volume->name, "md/md%d",j);
			rc = EngFncs->allocate_region(volume->name, &region);
		}
		if (j<0) {
			LOG_ERROR("No more names for MD ");
			LOG_EXIT_INT(ENOMEM);
			return ENOMEM;
		}
	}
	for (i = 0; (i < MAX_MD_DEVICES) && (found < volume->nr_disks); i++ ) {
		// check for null object, if missing, skip and set corrupt flag
		if (volume->child_object[i]) {
			// if name registration failed and we changed the name, fix up all the minor numbers
			if (j >= 0) {
				volume->super_array[i]->md_minor = j;
			}
			md_append_region_to_object(region, volume->child_object[i]);
			LOG_DETAILS("Adding Object %s to %s\n",
				volume->child_object[i]->name, volume->name);
			region->size += MD_NEW_SIZE_SECTORS(volume->child_object[i]->size);
			found++;
		}else {
			MESSAGE("Region %s is corrupt."
				"  The disk indexed %d (major:%d, minor:%d, index:%d) is missing.\n",
				region->name, i,
				volume->super_block->disks[i].major,
				volume->super_block->disks[i].minor,
                                i);
			volume->flags |= MD_CORRUPT;
			region->flags |= SOFLAG_CORRUPT;
		}


	}
	// no holes in object list, but did not find enough.
	if (!(volume->flags & MD_CORRUPT) && (volume->nr_disks != volume->super_block->nr_disks) ) {
		MESSAGE("Region %s is corrupt.  The MD superblock has raid_disks=%d, only found %d disks.\n",
			region->name, volume->super_block->raid_disks, volume->nr_disks);
		volume->flags |= MD_CORRUPT;
		region->flags |= SOFLAG_CORRUPT;
	}

	region->data_type = DATA_TYPE;
	region->plugin = linear_plugin;
	region->private_data = (void *)volume;
	volume->flags |= MD_DISCOVERED;
	volume->region = region;

	if (volume->flags & MD_CORRUPT) {
		md_add_object_to_list(region, output_list);
		LOG_EXIT_INT(rc);
		return rc;
	}
	
	/*
	 * Query device-mapper for the status of this MD object.
	 * if this MD object is active, it's already activated as
	 * an DM device.  Otherwise, check with the MD kernel driver.
	 */
	rc = EngFncs->dm_update_status(region);
	if (!rc && (region->flags & SOFLAG_ACTIVE)) {
		rc = linear_compare_targets(volume);
		if (rc) {
			region->flags |= SOFLAG_NEEDS_ACTIVATE;
		} else {
			LOG_DEBUG("Region %s is an active DM device (%d:%d)\n",
				  region->name, region->dev_major, region->dev_minor);
		}
	} else {
		rc = 0;
		region->dev_major = MD_MAJOR;
		region->dev_minor = volume->super_block->md_minor;
		MD_CHECK_ACTIVE(region);
	}
	
	if ((region->flags & SOFLAG_ACTIVE) == 0)
		region->flags |= SOFLAG_NEEDS_ACTIVATE;

	switch (linear_verify_and_fix_array(volume, 0, 1)){
	case 1: // minor change, just fix it.
		linear_verify_and_fix_array(volume, 1, 0);
		break;
	case 2: // major problem
		volume->flags |= MD_CORRUPT;
		region->flags |= SOFLAG_CORRUPT;
		break;
	default:
		break;
	}
	md_add_object_to_list(region, output_list);
	LOG_EXIT_INT(rc);
	return rc;
}




/* Function: discover_regions
 *
 *	run the global list of regions and pirce them together.
 */
int linear_discover_regions( list_anchor_t output_list, int *count, boolean final_call )
{
	int rc = 0;
	md_volume_t * volume = volume_list_head;

	my_plugin = linear_plugin;
	LOG_ENTRY();

	while (volume != NULL) {
		if ((!(volume->flags & MD_DISCOVERED)) && (volume->personality == LINEAR)) {
			rc = linear_create_region(volume, output_list, final_call);
			if (volume->flags & MD_DISCOVERED) {
				*count = *count + 1;
			}
		}
		volume = volume->next;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

// verify the raid 1 array.  If 'fix' is 0 then just perform validation, return 0 if
// array is OK, 1 if array needs to be fixed.
// If 'fix' is 1, then fix up the array on the fly and return:
// 0 if untouched.
// 1 if minor fix made  (only used for major/minor numbers)
// 2 if major fix made

int  linear_verify_and_fix_array(md_volume_t * volume, int fix, int do_msg){

	int     i;
	int 	change = 0;
	//int 	nr_disks = 0, raid_disks = 0, spare_disks = 0, working_disks=0, active_disks=0;
	int 	nr_disks = 0, raid_disks = 0, working_disks=0, active_disks=0;
	int 	failed_disks=0;
	int 	major, minor;
	mdp_disk_t disk;
	mdp_disk_t disk2;

	LOG_ENTRY();

	for (i = 0; i < MAX_MD_DEVICES && nr_disks < volume->nr_disks; i++ ) {
		if (!volume->child_object[i]) {
			// ok, found a hole
			break;  // array should already be marked corrupt, quit.
		} else {
			nr_disks++;
			if (volume->super_block->disks[i].number != i ||
			    volume->super_block->disks[i].raid_disk != i) {
				change |= 2;
				if (!fix) {	  // not allowed to fix, quit now.
					if (do_msg) {
						MESSAGE("Region %s object index incorrect: is %d, should be %d.\n",volume->name, volume->super_block->disks[i].number,i);
					}
				} else {
					volume->super_block->disks[i].number = i;
					volume->super_block->disks[i].raid_disk = i;
				}
			}

			if (!(volume->flags & MD_USE_OLD_DEV)) {
				major = volume->child_object[i]->dev_major;
				minor = volume->child_object[i]->dev_minor;
				if ((volume->super_block->disks[i].major != major ||
				    volume->super_block->disks[i].minor != minor) &&
					(major != 0 || minor !=0)) {
					change |= 1;
					LOG_DEFAULT("Region %s object index %d (%s) has incorrect major/minor (%d:%d), should be (%d:%d)\n",
						volume->name, i, volume->child_object[i]->name,
						volume->super_block->disks[i].major,
						volume->super_block->disks[i].minor,
						major, minor);
					if (fix) {	  // allowed to fix?
						if (volume->super_block->disks[i].major != major)
							volume->commit_flag |= MD_COMMIT_SAVE_SB;
						volume->super_block->disks[i].major = major;
						volume->super_block->disks[i].minor = minor;
	
						LOG_DEFAULT("[Fixed] Setting major/minor of (%s) to (%d:%d)\n",
							volume->child_object[i]->name,
							major,
							minor);
					}
				}
			}

			if (i >= volume->super_block->nr_disks) {
				change |= 2;
				if (fix) {
					volume->super_block->disks[i].state = (1 << MD_DISK_NEW);
				} else {
					if (do_msg) {
						MESSAGE("Region %s object index %d is greater than nr_disks.\n", volume->name, i);
					}
				}
			}

			switch (volume->super_block->disks[i].state) {
			case (1<<MD_DISK_ACTIVE | 1<<MD_DISK_SYNC):
			case (1<<MD_DISK_ACTIVE | 1<<MD_DISK_SYNC | 1<<MD_DISK_PENDING_ACTIVE ):
			case (1<<MD_DISK_ACTIVE | 1<<MD_DISK_SYNC | 1<<MD_DISK_PENDING_ACTIVE | 1<<MD_DISK_NEW):
			case (1<<MD_DISK_ACTIVE | 1<<MD_DISK_SYNC | 1<<MD_DISK_NEW):
       				active_disks++;
       				raid_disks++;
       				working_disks++;
       				break;

				// active, but not sync, kernel just kind of ignores these drives
				// so make him a spare so that the kernel will re-sync if needed.
				// or sync, but not active, do the same.
			case (1<<MD_DISK_ACTIVE):
			case (1<<MD_DISK_SYNC):
				change |= 2;
				if (!fix) {
					if (do_msg) {
						MESSAGE("Region %s object index %d is in invalid state.\n",volume->name, i);
					}
				}else {
					volume->super_block->disks[i].state =(1<<MD_DISK_PENDING_ACTIVE)| (1<<MD_DISK_NEW);
				}
			case 0:	// 0 = spare
			case (1<<MD_DISK_NEW):	// new = spare
			case (1<<MD_DISK_PENDING_ACTIVE):	// new = spare
			case (1<<MD_DISK_PENDING_ACTIVE | 1<<MD_DISK_NEW):	// new = spare
				//spare_disks++;
       				working_disks++;
				break;
			case (1<<MD_DISK_REMOVED):
			case (1<<MD_DISK_FAULTY):
			case (1<<MD_DISK_FAULTY | 1<<MD_DISK_REMOVED):
			default:
				if (!fix) {
					if (do_msg) {
						MESSAGE("Region %s object index %d (%s) is faulty.  Array may be degraded.\n",volume->name, i, volume->child_object[i]->name);
					}
				}
				failed_disks++;
//				raid_disks++;
				break;
			}
		}
	}

	// check to be sure that all of the unused disks array entries are zeroed
	// If not, the boneheaded kernel MD code will use these even though
	// the count field indicates athat they are not valid.
	// To make matters worse, only raid4/5 and 1 work this way, so since we have
	// a common SB creation routine we can not always be right.  So just allow
	// these extras disks entries to have the sync bit on or off.
	memset(&disk, 0, sizeof(mdp_disk_t));
	memset(&disk2, 0, sizeof(mdp_disk_t));
	disk.state = (1<<MD_DISK_SYNC);
	for (i = volume->nr_disks; i < MAX_MD_DEVICES; i++) {
		mdp_disk_t *descriptor = &volume->super_block->disks[i];

		if (descriptor_removed(descriptor))
			continue;

		if (memcmp(&disk, descriptor, sizeof(mdp_disk_t)) &&
		    memcmp(&disk2, descriptor, sizeof(mdp_disk_t))) {
			change |= 2;
			if (!fix) {
				if (do_msg) {
					MESSAGE("Region %s disks array not zeroed.\n",volume->name);
				}
			} else{
				memcpy(descriptor, &disk, sizeof(mdp_disk_t));
			}
		}
	}


	if (volume->super_block->active_disks != active_disks ||
	    volume->super_block->working_disks != working_disks ||
//	    volume->super_block->raid_disks != raid_disks || DON'T FIX RAID DISKS
	    volume->super_block->failed_disks != failed_disks ||
	    // volume->super_block->spare_disks != spare_disks ||
	    volume->super_block->nr_disks != nr_disks ) {
		change |= 2;
		if (!fix) {
			if (do_msg) {
				MESSAGE("Region %s disk counts incorrect.\n",volume->name);
			}
		} else {
			volume->super_block->active_disks = active_disks;
			volume->super_block->working_disks = working_disks;
//			volume->super_block->raid_disks = raid_disks;
			volume->super_block->failed_disks = failed_disks;
			//volume->super_block->spare_disks = spare_disks;
			volume->super_block->nr_disks = nr_disks;
		}
	}

	if (fix) {
		volume->flags &= ~MD_CORRUPT;
		volume->flags &= ~MD_DEGRADED;
		volume->region->flags |= SOFLAG_DIRTY;
//		md_rediscover_volumes_for_region(volume->region);
	}
	LOG_EXIT_INT(change);
	return change;
}

