/*
 *   (C) Copyright IBM Corp. 2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: LVM2 Plugin
 * File: evms2/engine/plugins/lvm2/regions.c
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <plugin.h>
#include "lvm2.h"


/**
 * Routines for communicating with Device-Mapper.
 **/


/**
 * build_target_list
 *
 * Build a list of DM targets to represent the mapping for this region.
 **/
dm_target_t *build_target_list(storage_object_t *region)
{
	container_data_t *c_data = region->producing_container->private_data;
	region_data_t *r_data = region->private_data;
	u_int64_t pe_size = c_data->pe_size;
	dm_target_t *target, *target_list = NULL;
	dm_target_type target_type;
	storage_object_t *object;
	region_mapping_t *r_map;
	physical_extent_t *pe;
	dm_target_stripe_t *stripe = NULL;
	dm_device_t *linear;
	list_element_t iter;
	int i;

	LOG_ENTRY();
	LOG_DEBUG("Building target list for region %s.\n", region->name);

	/* Each region-mapping gets one DM target. */
	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		target_type = (r_map->stripe_count > 1) ?
			      DM_TARGET_STRIPE : DM_TARGET_LINEAR;
		target = EngFncs->dm_allocate_target(target_type,
						     r_map->start_le * pe_size,
						     r_map->le_count * pe_size,
						     r_map->stripe_count, 0);
		if (!target) {
			LOG_ERROR("Error allocating DM target for region %s.\n",
				  region->name);
			EngFncs->dm_deallocate_targets(target_list);
			target_list = NULL;
			break;
		}

		if (target_type == DM_TARGET_STRIPE) {
			stripe = target->data.stripe;
			stripe->num_stripes = r_map->stripe_count;
			stripe->chunk_size = r_map->stripe_size;
		}

		for (i = 0; i < r_map->stripe_count; i++) {
			linear = (target_type == DM_TARGET_STRIPE) ?
				 &stripe->devices[i] : target->data.linear;
			pe = r_map->le_maps[i].map[0].pe;
			/* If we're moving this "stripe", map to the copy-job's
			 * mirror object instead of the normal PV object.
			 */
			object = (r_map->le_maps[i].copy_job) ?
				 r_map->le_maps[i].copy_job->mirror :
				 pe->pv_data->object;
			linear->major = object->dev_major;
			linear->minor = object->dev_minor;
			linear->start = (r_map->le_maps[i].copy_job) ? 0 :
					pe->pv_data->pe_start +
					pe->number * pe_size;
		}

		EngFncs->dm_add_target(target, &target_list);
	}

	LOG_EXIT_PTR(target_list);
	return target_list;
}

/**
 * compare_kernel_mapping
 *
 * Compare the region's kernel mapping with the current mapping discovered
 * from the metadata. If they don't match the region needs to be
 * reactivated.
 **/
static void compare_kernel_mapping(storage_object_t *region)
{
	dm_target_t *kernel_target, *kernel_target_list = NULL;
	dm_target_t *engine_target, *engine_target_list = NULL;
	dm_device_t *kernel_linear, *engine_linear;
	dm_target_stripe_t *kernel_stripe = NULL, *engine_stripe = NULL;
	u_int32_t stripe_count, i;
	int rc, needs_reactivate = TRUE;

	LOG_ENTRY();
	LOG_DEBUG("Comparing engine and kernel mappings for region %s.\n",
		  region->name);

	/* Get the kernel mapping. */
	rc = EngFncs->dm_get_targets(region, &kernel_target_list);
	if (rc) {
		LOG_ERROR("Error getting kernel mapping for region %s.\n",
			  region->name);
		goto out;
	}

	/* Get the current engine mapping. */
	engine_target_list = build_target_list(region);
	if (!engine_target_list) {
		LOG_ERROR("Error building engin mapping for region %s.\n",
			  region->name);
		goto out;
	}

	/* Compare the kernel's mapping with the current mapping. */
	for (kernel_target = kernel_target_list, engine_target = engine_target_list;
	     kernel_target && engine_target;
	     kernel_target = kernel_target->next, engine_target = engine_target->next) {
		stripe_count = 1;

		/* Start, length, and type of the target must match. */
		if (kernel_target->start  != engine_target->start ||
		    kernel_target->length != engine_target->length ||
		    kernel_target->type   != engine_target->type) {
			goto out;
		}

		/* For stripe targets, num-stripes and chunk-size must match. */
		if (kernel_target->type == DM_TARGET_STRIPE) {
			kernel_stripe = kernel_target->data.stripe;
			engine_stripe = engine_target->data.stripe;
			if (kernel_stripe->num_stripes != engine_stripe->num_stripes ||
			    kernel_stripe->chunk_size  != engine_stripe->chunk_size) {
				goto out;
			}
			stripe_count = kernel_stripe->num_stripes;
		}

		/* Major, minor, and starting-LBA of each device must match. */
		for (i = 0; i < stripe_count; i++) {
			kernel_linear = (kernel_target->type == DM_TARGET_STRIPE) ?
					&kernel_stripe->devices[i] : kernel_target->data.linear;
			engine_linear = (engine_target->type == DM_TARGET_STRIPE) ?
					&engine_stripe->devices[i] : engine_target->data.linear;
			if (kernel_linear->major != engine_linear->major ||
			    kernel_linear->minor != engine_linear->minor ||
			    kernel_linear->start != engine_linear->start) {
				goto out;
			}
		}

	}

	if (!kernel_target && !engine_target) {
		LOG_DETAILS("Kernel and engine mappings match for region %s.\n",
			    region->name);
		needs_reactivate = FALSE;
	}

out:
	if (needs_reactivate) {
		/* FIXME: Only do during final discovery? */
		MESSAGE(_("Error comparing kernel mappings for active region %s.\n"
			  "Region %s will be reactivated at the next commit.\n"),
			region->name, region->name);
		region->flags |= SOFLAG_NEEDS_ACTIVATE;
	}

	EngFncs->dm_deallocate_targets(kernel_target_list);
	EngFncs->dm_deallocate_targets(engine_target_list);

	LOG_EXIT_VOID();
}

/**
 * check_kernel_status
 *
 * See if this region is active in the kernel. If it is, compare the kernel's
 * mapping with the current mapping.
 **/
static int check_kernel_status(storage_object_t *region)
{
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Checking kernel status of region %s.\n", region->name);

	rc = EngFncs->dm_update_status(region);
	if (!rc) {
		if (region->flags & SOFLAG_ACTIVE) {
			compare_kernel_mapping(region);
		}
	} else {
		LOG_ERROR("Error checking kernel status of region %s.\n",
			  region->name);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * rename_region
 *
 * Check if this region has been renamed. If so, tell DM to change the name
 * of the device. If the rename succeeds, remove the old name.
 **/
int rename_region(storage_object_t *region)
{
	int rc = 0;

	LOG_ENTRY();

	if (LVM2_REGION_RENAMED(region)) {
		LOG_DEBUG("Renaming region from %s to %s.\n",
			  region->dev_name, region->name);

		rc = EngFncs->dm_rename(region, region->dev_name, region->name);
		if (!rc) {
			remove_old_region_name(region);
		} else {
			LOG_ERROR("Error renaming region from %s to %s.\n",
				  region->dev_name, region->name);
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * Routines for constructing and tearing-down regions.
 **/


/**
 * Translate between LVM2 LV names and EVMS region names.
 **/

void lv_name_to_region_name(char *lv_name,
			    char *region_name,
			    char *container_name)
{
	LOG_ENTRY();

	snprintf(region_name, EVMS_NAME_SIZE, "%s/%s", container_name, lv_name);

	LOG_EXIT_VOID();
}

int region_name_to_lv_name(char *region_name,
			   char *lv_name,
			   char *container_name)
{
	char *ptr = region_name;
	int rc = 0;

	LOG_ENTRY();

	/* Find the container name and skip over it. */
	ptr = strstr(ptr, container_name);
	if (ptr) {
		ptr += strlen(container_name) + 1;
		strncpy(lv_name, ptr, EVMS_NAME_SIZE);
	} else {
		LOG_ERROR("Invalid region name: %s\n", region_name);
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * switch_region_name
 *
 * If a region was renamed and then needs to be deactivated, the deactivation
 * must be done with the previous name, since that is the name that DM is
 * currently using for that region. Switch the current name and the old name
 * so the deactivation will succeed.
 **/
void switch_region_name(storage_object_t *region)
{
	char temp[EVMS_NAME_SIZE+1];

	LOG_ENTRY();

	if (LVM2_REGION_RENAMED(region)) {
		strncpy(temp, region->dev_name, EVMS_NAME_SIZE);
		strncpy(region->dev_name, region->name, EVMS_NAME_SIZE);
		strncpy(region->name, temp, EVMS_NAME_SIZE);
	}

	LOG_EXIT_VOID();
}

/**
 * remove_old_region_name
 *
 * This region has been renamed, and now we no longer need the old name. Clear
 * the old name and the rename flag.
 **/
void remove_old_region_name(storage_object_t *region)
{
	LOG_ENTRY();

	if (LVM2_REGION_RENAMED(region)) {
		EngFncs->unregister_name(region->dev_name);
		memset(region->dev_name, 0, EVMS_NAME_SIZE+1);
	}

	LOG_EXIT_VOID();
}

/**
 * set_new_region_name
 *
 * Set a new name in the specified region. If necessary, save the old name so
 * we can rename or deactivate the DM device during commit.
 **/
void set_new_region_name(storage_object_t *region, char *lv_name)
{
	storage_container_t *container = region->producing_container;
	char new_name[EVMS_NAME_SIZE+1];
	char old_name[EVMS_NAME_SIZE+1];

	LOG_ENTRY();
	LOG_DEBUG("Change name of region %s to %s.\n", region->name, lv_name);

	/* Save the current name. */
	strncpy(old_name, region->name, EVMS_NAME_SIZE);

	/* Copy the new name into the region. */
	lv_name_to_region_name(lv_name, new_name, container->name);
	EngFncs->register_name(new_name);
	strncpy(region->name, new_name, EVMS_NAME_SIZE);

	if (LVM2_REGION_RENAMED(region)) {
		/* If we've already renamed this region, we can simply
		 * unregister the old name. We need the *first* name in
		 * order to properly rename or deactivate the DM device.
		 */
		EngFncs->unregister_name(old_name);
	} else {
		/* If this is the first rename, save the old name in the
		 * region's private data.
		 */
		strncpy(region->dev_name, old_name, EVMS_NAME_SIZE);

		if (region->flags & SOFLAG_ACTIVE) {
			/* If the region is active, we need to tell DM
			 * to rename the device during activation.
			 */
			region->flags |= SOFLAG_NEEDS_ACTIVATE;
		} else {
			/* If the region is inactive, we
			 * can unregister the old_name.
			 */
			remove_old_region_name(region);
		}
	}

	LOG_EXIT_VOID();
}

/**
 * Add and remove regions from containers' produced-objects list.
 **/

void add_region_to_container(storage_object_t *region,
			     storage_container_t *container)
{
	LOG_ENTRY();
	LOG_DEBUG("Adding region %s to container %s.\n",
		  region->name, container->name);
	region->producing_container = container;
	EngFncs->insert_thing(container->objects_produced,
			      region, INSERT_AFTER, NULL);
	LOG_EXIT_VOID();
}

void remove_region_from_container(storage_object_t *region)
{
	storage_container_t *container = region->producing_container;
	LOG_ENTRY();
	if (container) {
		LOG_DEBUG("Removing region %s from container %s.\n",
			  region->name, container->name);
		EngFncs->remove_thing(container->objects_produced, region);
		region->producing_container = NULL;
	}
	LOG_EXIT_VOID();
}

/**
 * Add and remove objects from the parent and child lists.
 **/

static void make_parent_and_child(storage_object_t *parent,
				  storage_object_t *child)
{
	LOG_EXTRA_ENTRY();
	EngFncs->insert_thing(parent->child_objects, child,
			      INSERT_AFTER | EXCLUSIVE_INSERT, NULL);
	EngFncs->insert_thing(child->parent_objects, parent,
			      INSERT_AFTER | EXCLUSIVE_INSERT, NULL);
	LOG_EXTRA_EXIT_VOID();
}

static void unmake_parent_and_child(storage_object_t *parent,
				    storage_object_t *child)
{
	LOG_ENTRY();
	if (parent && child) {
		EngFncs->remove_thing(parent->child_objects, child);
		EngFncs->remove_thing(child->parent_objects, parent);
	}
	LOG_EXIT_VOID();
}

/**
 * Increment or decrement the size of the region by the size of the mapping.
 * The region's producing_container must be initialized.
 **/

static void increment_region_size(storage_object_t *region,
				  region_mapping_t *r_map)
{
	container_data_t *c_data = region->producing_container->private_data;
	LOG_ENTRY();
	region->size += r_map->le_count * c_data->pe_size;
	LOG_EXIT_VOID();
}

static void decrement_region_size(storage_object_t *region,
				  region_mapping_t *r_map)
{
	container_data_t *c_data;
	LOG_ENTRY();
	if (region->producing_container) {
		/* If we don't have a container pointer, then we're probably
		 * deleting this region and its size isn't important.
		 */
		c_data = region->producing_container->private_data;
		region->size -= r_map->le_count * c_data->pe_size;
	}
	LOG_EXIT_VOID();
}

/**
 * Add and remove region-mappings from the region. Increment or decrement
 * the size of the region as appropriate.
 **/

static void add_mapping_to_region(region_mapping_t *r_map,
				  region_data_t *r_data)
{
	insert_flags_t flag = INSERT_AFTER;
	region_mapping_t *this_map;
	list_element_t iter;

	LOG_ENTRY();

	/* Find the correct location in the list for this mapping.
	 * Mappings are in order by the start_le field.
	 */
	LIST_FOR_EACH(r_data->mappings, iter, this_map) {
		if (r_map->start_le < this_map->start_le) {
			flag = INSERT_BEFORE;
			break;
		}
	}

	r_map->r_data = r_data;
	EngFncs->insert_thing(r_data->mappings, r_map, flag, iter);
	increment_region_size(r_data->region, r_map);
	
	LOG_EXIT_VOID();
}

static void remove_mapping_from_region(region_mapping_t *r_map)
{
	region_data_t *r_data = r_map->r_data;

	LOG_ENTRY();

	if (r_data) {
		decrement_region_size(r_data->region, r_map);
		EngFncs->remove_thing(r_data->mappings, r_map);
		r_map->r_data = NULL;
	}

	LOG_EXIT_VOID();
}

/**
 * find_mapping_by_index
 *
 * Run through the list of mapping's in this region and return a pointer to
 * the map_index'th mapping.
 **/
region_mapping_t *find_mapping_by_index(storage_object_t *region,
					u_int32_t map_index)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	list_element_t iter;
	u_int32_t i = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		if (i == map_index) {
			break;
		}
		i++;
	}

	LOG_EXIT_PTR(r_map);
	return r_map;
}

/**
 * deallocate_le_map_stripe
 *
 * Free one logical-extent array.
 **/
void deallocate_le_map_stripe(logical_extent_t *le_map)
{
	LOG_ENTRY();
	EngFncs->engine_free(le_map);
	LOG_EXIT_VOID();
}

/**
 * deallocate_le_map
 *
 * Free the logical-extent maps for this region-mapping.
 **/
static void deallocate_le_map(region_mapping_t *r_map)
{
	logical_extent_map_t *le_maps = r_map->le_maps;
	u_int64_t i;

	LOG_ENTRY();

	if (le_maps) {
		for (i = 0; i < r_map->stripe_count; i++) {
			deallocate_le_map_stripe(le_maps[i].map);
			deallocate_le_map_stripe(le_maps[i].new_map);
		}
		EngFncs->engine_free(le_maps);
		r_map->le_maps = NULL;
	}

	LOG_EXIT_VOID();
}

/**
 * allocate_le_map_stripe
 *
 * Allocates one stripe array for an LE-map.
 **/
logical_extent_t *allocate_le_map_stripe(logical_extent_map_t *le_maps,
					 u_int64_t extents_per_stripe)
{
	logical_extent_t *le_map;
	u_int64_t i;

	LOG_ENTRY();

	le_map = EngFncs->engine_alloc(extents_per_stripe * sizeof(*le_map));
	if (le_map) {
		for (i = 0; i < extents_per_stripe; i++) {
			/* Set up back-pointers to le_maps. */
			le_map[i].le_map = le_maps;
		}
	}

	LOG_EXIT_PTR(le_map);
	return le_map;
}

/**
 * allocate_le_map
 *
 * Allocate and initialize arrays of logical-extents for this region-mapping.
 * The r_map structure must be initialized before calling this routine.
 **/
static int allocate_le_map(region_mapping_t *r_map)
{
	logical_extent_map_t *le_maps;
	u_int64_t i, stripe_count = r_map->stripe_count;
	u_int64_t extents_per_stripe = r_map->le_count / stripe_count;
	int rc = 0;

	LOG_ENTRY();

	le_maps = EngFncs->engine_alloc(stripe_count * sizeof(*le_maps));
	if (!le_maps) {
		rc = ENOMEM;
		goto out;
	}
	r_map->le_maps = le_maps;

	for (i = 0; i < stripe_count; i++) {
		/* Set up back-pointers to r_map. */
		le_maps[i].r_map = r_map;

		le_maps[i].map = allocate_le_map_stripe(&le_maps[i],
							extents_per_stripe);
		if (!le_maps[i].map) {
			rc = ENOMEM;
			goto out;
		}
	}

out:
	if (rc) {
		LOG_ERROR("Error allocating LE map for region-mapping for "
			  "region %s.\n", r_map->r_data->region->name);
		deallocate_le_map(r_map);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * allocate_region_mapping
 *
 * Allocate and initialize a new region-mapping for the specified region.
 **/
static region_mapping_t *allocate_region_mapping(region_data_t *r_data,
						 u_int64_t start_le,
						 u_int64_t le_count,
						 u_int64_t stripe_count,
						 u_int64_t stripe_size)
{
	region_mapping_t *r_map;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Allocating mapping for region %s:\n", r_data->region->name);
	LOG_DEBUG("\tStart-LE: %"PRIu64", LE-count: %"PRIu64
		  ", Stripe-count: %"PRIu64", Stripe-size: %"PRIu64"\n",
		  start_le, le_count, stripe_count, stripe_size);

	r_map = EngFncs->engine_alloc(sizeof(*r_map));
	if (r_map) {
		r_map->r_data = r_data;
		r_map->start_le = start_le;
		r_map->le_count = le_count;
		r_map->stripe_count = stripe_count;
		r_map->stripe_size = stripe_size;

		rc = allocate_le_map(r_map);
		if (rc) {
			EngFncs->engine_free(r_map);
			r_map = NULL;
		}
	} else {
		LOG_ERROR("Error allocating region-mapping for region %s.\n",
			  r_data->region->name);
	}

	LOG_EXIT_PTR(r_map);
	return r_map;
}

/**
 * deallocate_region_mapping
 *
 * Free the specified region-mapping and it's LE-map.
 **/
static void deallocate_region_mapping(region_mapping_t *r_map)
{
	LOG_ENTRY();

	if (r_map) {
		deallocate_le_map(r_map);
		EngFncs->engine_free(r_map);
	}

	LOG_EXIT_VOID();
}

/**
 * deallocate_region_mappings
 *
 * Free the entire list of region-mappings in this region.
 **/
void deallocate_region_mappings(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	list_element_t iter1, iter2;

	LOG_ENTRY();
	LOG_DEBUG("Deallocating mappings for region %s.\n", region->name);

	LIST_FOR_EACH_SAFE(r_data->mappings, iter1, iter2, r_map) {
		remove_mapping_from_region(r_map);
		deallocate_region_mapping(r_map);
	}

	LOG_EXIT_VOID();
}

/**
 * does_pv_map_to_region
 *
 * Does any of this region's logical-extents appear in any of this PV's PE-map
 * entries.
 **/
static int does_pv_map_to_region(pv_data_t *pv_data, region_data_t *r_data)
{
	physical_extent_t *pe_map = pv_data->pe_map;
	u_int64_t i;
	int result = FALSE;

	LOG_ENTRY();

	for (i = 0; i < pv_data->pe_count; i++) {
		if (pe_map[i].le &&
		    pe_map[i].le->le_map->r_map->r_data == r_data) {
			result = TRUE;
			break;
		}
	}

	LOG_EXIT_INT(result);
	return result;
}

/**
 * construct_region_mapping_stripe
 *
 * Set up the cross-pointers between this LE-map and PE-map. Also make the
 * region and PV-object parent and child if they aren't already.
 **/
void construct_region_mapping_stripe(logical_extent_t *le_map,
				     physical_extent_t *pe_map,
				     u_int64_t pe_index)
{
	region_mapping_t *r_map = le_map[0].le_map->r_map;
	u_int64_t i, extents_per_stripe = r_map->le_count / r_map->stripe_count;

	LOG_ENTRY();

	/* Set up the LE and PE map entries to point to each other. */
	for (i = 0; i < extents_per_stripe; i++) {
		le_map[i].pe = pe_map + pe_index;
		pe_map[pe_index].le = le_map + i;
		pe_index++;
	}

	make_parent_and_child(r_map->r_data->region, pe_map[0].pv_data->object);

	LOG_EXIT_VOID();
}

/**
 * construct_region_mapping
 *
 * Run through the LE-map for this mapping and set up the PE pointers based
 * on the arrays of PV-indexes and PE-indexes.
 *
 * FIXME: We'll need something like this for setting up "new" LE-maps.
 **/
static int construct_region_mapping(region_mapping_t *r_map,
				    u_int32_t *pv_indexes,
				    u_int64_t *pe_indexes)
{
	storage_object_t *region = r_map->r_data->region;
	pv_data_t *pv_data;
	u_int64_t i;
	int rc = 0;

	LOG_ENTRY();

	for (i = 0; i < r_map->stripe_count; i++) {
		pv_data = find_pv_by_index(region->producing_container,
					   pv_indexes[i]);
		if (!pv_data) {
			/* FIXME: This shouldn't happen as long as we filled in
			 *        the missing PVs after container-discovery.
			 */
			LOG_ERROR("BUG! Cannot find PV %u in container %s.\n",
				  pv_indexes[i], region->producing_container->name);
			rc = EINVAL;
			break;
		}

		construct_region_mapping_stripe(r_map->le_maps[i].map,
						pv_data->pe_map, pe_indexes[i]);
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * deconstruct_region_mapping_stripe
 *
 * Helper for deconstruct_region_mapping().
 **/
void deconstruct_region_mapping_stripe(logical_extent_t *le_map)
{
	region_mapping_t *r_map;
	u_int64_t i, extents_per_stripe;
	int result;

	LOG_ENTRY();

	if (!le_map) {
		LOG_EXIT_VOID();
		return;
	}

	r_map = le_map[0].le_map->r_map;
	extents_per_stripe = r_map->le_count / r_map->stripe_count;

	/* Erase the pointers from the PE-map to the LE-map. No need to erase
	 * the pointers in the LE-map, since we'll be deleting the LE-map.
	 */
	for (i = 0; i < extents_per_stripe; i++) {
		if (le_map[i].pe &&
		    le_map[i].pe->le == &le_map[i]) {
			le_map[i].pe->le = NULL;
		}
	}

	if (le_map[0].pe) {
		result = does_pv_map_to_region(le_map[0].pe->pv_data,
					       r_map->r_data);
		if (!result) {
			unmake_parent_and_child(r_map->r_data->region,
						le_map[0].pe->pv_data->object);
		}
	}

	LOG_EXIT_VOID();
}

/**
 * deconstruct_region_mapping
 *
 * Removing the mappings between logical and physical extents represented
 * by this mapping. If this mapping's region no longer has any mapping to
 * a PV, remove the region from the PV's parent list.
 **/
static void deconstruct_region_mapping(region_mapping_t *r_map)
{
	logical_extent_map_t *le_maps = r_map->le_maps;
	u_int64_t i;

	LOG_ENTRY();

	for (i = 0; i < r_map->stripe_count; i++ ) {
		deconstruct_region_mapping_stripe(le_maps[i].map);
		deconstruct_region_mapping_stripe(le_maps[i].new_map);
	}

	LOG_EXIT_VOID();
}

/**
 * deconstruct_region_mappings
 *
 * Deconstruct all the mappings for this region.
 **/
void deconstruct_region_mappings(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	list_element_t iter;

	LOG_ENTRY();
	LOG_DEBUG("Deconstructing mappings for region %s.\n", region->name);

	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		deconstruct_region_mapping(r_map);
	}

	LOG_EXIT_VOID();
}

/**
 * allocate_region
 *
 * Allocate and initialize a region storage-object and all the necessary
 * private data.
 **/
static storage_object_t *allocate_region(char *region_name, char *uuid)
{
	storage_object_t *region = NULL;
	region_data_t *r_data;
	int rc;

	LOG_ENTRY();

	rc = EngFncs->allocate_region(region_name, &region);
	if (rc) {
		LOG_ERROR("Error allocating new region %s.\n", region_name);
		goto out;
	}

	/* Private data for the region. */
	r_data = EngFncs->engine_alloc(sizeof(*r_data));
	if (!r_data) {
		LOG_ERROR("Error allocating private data for new region %s.\n",
			  region_name);
		EngFncs->free_region(region);
		rc = ENOMEM;
		goto out;
	}

	/* List for the region-mappings. */
	r_data->mappings = EngFncs->allocate_list();
	if (!r_data->mappings) {
		LOG_ERROR("Error allocating mappings list for new region %s.\n",
			  region_name);
		EngFncs->engine_free(r_data);
		EngFncs->free_region(region);
		rc = ENOMEM;
		goto out;
	}

	r_data->region = region;
	memcpy(r_data->uuid, uuid, LVM2_UUID_LEN);

	region->data_type = DATA_TYPE;
	region->plugin = my_plugin_record;
	region->private_data = r_data;

	LOG_DETAILS("Allocated region %s.\n", region_name);

out:
	LOG_EXIT_PTR(region);
	return region;
}

/**
 * deallocate_region
 *
 * Free the memory for this region. Includes the region-data and the
 * region-mappings list. This does not remove the region from its container,
 * and does not remove any PVs from the child-objects list.
 **/
void deallocate_region(storage_object_t *region)
{
	region_data_t *r_data;

	LOG_ENTRY();

	if (region) {
		LOG_DETAILS("Deallocating region %s.\n", region->name);
		r_data = region->private_data;
		if (r_data) {
			if (r_data->mappings) {
				deallocate_region_mappings(region);
				EngFncs->destroy_list(r_data->mappings);
			}
			EngFncs->engine_free(r_data);
		}
		EngFncs->free_region(region);
	}

	LOG_EXIT_VOID();
}


/**
 * Routines for managing the freespace regions.
 **/


/**
 * allocate_freespace_region
 *
 * Create the freespace region for this container.
 **/
int allocate_freespace_region(storage_container_t *container)
{
	storage_object_t *freespace;
	char name[EVMS_NAME_SIZE];
	char uuid[LVM2_UUID_LEN+1] = {'0'};
	int rc = 0;

	LOG_ENTRY();

	lv_name_to_region_name("Freespace", name, container->name);

	freespace = allocate_region(name, uuid);
	if (!freespace) {
		LOG_ERROR("Error allocating freespace region for "
			  "container %s.\n", container->name);
		rc = ENOMEM;
		goto out;
	}

	/* Manually change the data-type. */
	freespace->data_type = FREE_SPACE_TYPE;
	freespace->flags &= ~(SOFLAG_NEW | SOFLAG_DIRTY);

	add_region_to_container(freespace, container);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * get_freespace_region
 *
 * The freespace region is always the first region on the list.
 **/
storage_object_t *get_freespace_region(list_anchor_t region_list)
{
	storage_object_t *freespace;

	LOG_ENTRY();

	freespace = EngFncs->first_thing(region_list, NULL);
	if (!freespace) {
		LOG_ERROR("List of regions is empty.\n");
		goto out;
	}

	if (freespace->data_type != FREE_SPACE_TYPE) {
		LOG_ERROR("First region on the list (%s) is not a freespace "
			  "region.\n", freespace->name);
		freespace = NULL;
		goto out;
	}

	if (freespace->plugin != my_plugin_record) {
		LOG_ERROR("Freespace region %s does not belong to LVM2.\n",
			  freespace->name);
		freespace = NULL;
		goto out;
	}

out:
	LOG_EXIT_PTR(freespace);
	return freespace;
}

/**
 * create_freespace_mappings
 *
 * After all regions have been discovered and all their mappings have been
 * set up, create the mappings for the freespace region using any PEs that
 * aren't in use.
 **/
int create_freespace_mappings(storage_container_t *container)
{
	storage_object_t *freespace, *object;
	region_data_t *f_data;
	region_mapping_t *f_map;
	pv_data_t *pv_data;
	list_element_t iter;
	u_int64_t i, j, le = 0, count;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Creating freespace mappings for container %s.\n",
		  container->name);

	freespace = get_freespace_region(container->objects_produced);
	f_data = freespace->private_data;

	LIST_FOR_EACH(container->objects_consumed, iter, object) {
		if (MISSING_PV(object)) {
			/* "Missing" PVs don't have any freespace. */
			continue;
		}

		pv_data = object->consuming_private_data;
		for (i = 0; i < pv_data->pe_count; i++) {
			if (pv_data->pe_map[i].le) {
				/* This PE is in use. */
				continue;
			}

			/* Find all consecutive unused PEs. */
			for (j = i+1; j < pv_data->pe_count; j++) {
				if (pv_data->pe_map[j].le) {
					break;
				}
			}

			count = j - i;
			f_map = allocate_region_mapping(f_data, le, count, 1, 0);
			if (!f_map) {
				/* FIXME: Delete existing mappings? */
				rc = ENOMEM;
				goto out;
			}

			add_mapping_to_region(f_map, f_data);
			construct_region_mapping(f_map, &pv_data->pv_index, &i);

			le += count;
			i += count - 1;
		}
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * delete_freespace_mappings
 *
 * Delete all entries in this container's freespace region's list of mappings.
 **/
void delete_freespace_mappings(storage_container_t *container)
{
	storage_object_t *freespace;

	LOG_ENTRY();
	LOG_DEBUG("Deleting freespace mappings for container %s.\n",
		  container->name);

	freespace = get_freespace_region(container->objects_produced);
	if (freespace) {
		deconstruct_region_mappings(freespace);
		deallocate_region_mappings(freespace);
	}

	LOG_EXIT_VOID();
}


/**
 * Routines for discovering regions.
 **/


/**
 * get_segment_info_from_vgda
 *
 * Parse a "segment" section in the VGDA into appropriate values that can be
 * used to create and initialize a region-mapping.
 **/
static int get_segment_info_from_vgda(key_value_t *segment_node,
				      u_int64_t *start_le,
				      u_int64_t *le_count,
				      u_int64_t *stripe_count,
				      u_int64_t *stripe_size,
				      u_int32_t **pv_indexes,
				      u_int64_t **pe_indexes)
{
	key_value_t *node;
	char **strings;
	u_int64_t i;
	int rc = 0;

	LOG_ENTRY();

	/* Starting extent of this segment. */
	node = find_key(get_section(segment_node), "start_extent");
	if (!node) {
		LOG_ERROR("Error finding \"start_extent\" entry.\n");
		rc = EINVAL;
		goto out;
	}
	*start_le = read_int64(node);

	/* Number of extents in this segment. */
	node = find_key(get_section(segment_node), "extent_count");
	if (!node) {
		LOG_ERROR("Error finding \"extent_count\" entry.\n");
		rc = EINVAL;
		goto out;
	}
	*le_count = read_int64(node);

	/* Type of segment. Currently only understands "striped". */
	node = find_key(get_section(segment_node), "type");
	if (!node) {
		LOG_ERROR("Error finding \"type\" entry.\n");
		rc = EINVAL;
		goto out;
	}

	/* FIXME: Allow more mapping types. */
	if (strcmp(node->value.string, "striped")) {
		LOG_ERROR("Found segment type \"%s\".\n", node->value.string);
		LOG_ERROR("Only \"striped\" type currently supported.\n");
		rc = EINVAL;
		goto out;
	}

	/* Number of stripes in this segment.
	 * FIXME: Only applicable for "striped" types.
	 */
	node = find_key(get_section(segment_node), "stripe_count");
	if (!node) {
		LOG_ERROR("Error finding \"stripe_count\" entry.\n");
		rc = EINVAL;
		goto out;
	}
	*stripe_count = read_int64(node);

	if (*stripe_count > 1) {
		/* Size of each stripe "chunk". */
		node = find_key(get_section(segment_node), "stripe_size");
		if (!node) {
			LOG_ERROR("Error finding \"stripe_size\" entry.\n");
			rc = EINVAL;
			goto out;
		}
		*stripe_size = read_int64(node);
	} else {
		*stripe_size = 0;
	}

	/* Physical location (PV/starting-PE) of segment. */
	node = find_key(get_section(segment_node), "stripes");
	if (!node) {
		LOG_ERROR("Error finding \"stripes\" entry.\n");
		rc = EINVAL;
		goto out;
	}
	strings = node->value.array->strings;

	/* Allocate arrays for the PV and PE indexes. */
	*pv_indexes = EngFncs->engine_alloc(*stripe_count * sizeof(**pv_indexes));
	*pe_indexes = EngFncs->engine_alloc(*stripe_count * sizeof(**pe_indexes));
	if (!*pv_indexes || !*pe_indexes) {
		LOG_ERROR("Error allocating arrays for PV and PE indexes.\n");
		EngFncs->engine_free(*pv_indexes);
		EngFncs->engine_free(*pe_indexes);
		rc = ENOMEM;
		goto out;
	}

	/* Translate the "segment" node into PV and PE index arrays. */
	for (i = 0; i < *stripe_count; i++) {
		rc = sscanf(strings[i*2], "pv%u", &((*pv_indexes)[i]));
		if (rc != 1) {
			/* Parse error! */
			rc = EINVAL;
			break;
		}

		(*pe_indexes)[i] = strtoull(strings[i*2+1], NULL, 0);
	}

	rc = 0;

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * create_region_mapping_from_vgda
 *
 * Parse one "segment" section of the VGDA and construct a corresponding
 * region-mapping for this region.
 **/
static int create_region_mapping_from_vgda(key_value_t *segment_node,
					   storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	u_int64_t start_le = 0, stripe_count = 0;
	u_int64_t stripe_size = 0, le_count = 0;
	u_int64_t *pe_indexes = NULL;
	u_int32_t *pv_indexes = NULL;
	int rc;

	LOG_ENTRY();

	rc = get_segment_info_from_vgda(segment_node, &start_le, &le_count,
					&stripe_count, &stripe_size,
					&pv_indexes, &pe_indexes);
	if (rc) {
		goto out;
	}

	r_map = allocate_region_mapping(r_data, start_le, le_count,
					stripe_count, stripe_size);
	if (!r_map) {
		rc = ENOMEM;
		goto out;
	}

	add_mapping_to_region(r_map, r_data);

	rc = construct_region_mapping(r_map, pv_indexes, pe_indexes);
	if (rc) {
		deconstruct_region_mapping(r_map);
		remove_mapping_from_region(r_map);
		deallocate_region_mapping(r_map);
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * create_region_mappings_from_vgda
 *
 * Parse all the "segment" sections in this LV section of the VGDA and create
 * corresponding mappings for the region.
 **/
static int create_region_mappings_from_vgda(key_value_t *lv_node,
					    storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	key_value_t *node;
	char key[20];
	int mappings_count, segment_count;
	int rc = 0, i = 1;

	LOG_ENTRY();
	LOG_DEBUG("Discovering mappings for region %s.\n", region->name);

	/* Create a region-mapping for each "segment" section. */
	do {
		snprintf(key, 20, "segment%u", i++);
		node = find_key(get_section(lv_node), key);
		if (node) {
			rc = create_region_mapping_from_vgda(node, region);
		}
	} while (node && !rc);

	if (rc) {
		goto out;
	}

	/* Get the number of mapping segments and verify that it matches
	 * the number of mappings created for the region.
	 */
	node = find_key(get_section(lv_node), "segment_count");
	if (!node) {
		LOG_ERROR("Error finding \"segment_count\" section in VGDA "
			  "for region %s.\n", region->name);
		rc = EINVAL;
		goto out;
	}

	mappings_count = EngFncs->list_count(r_data->mappings);
	segment_count = read_int32(node);
	if (mappings_count != segment_count) {
		LOG_ERROR("Number of discovererd mappings (%u) does not match "
			  "recorded mapping-count (%u) for region %s.\n",
			  mappings_count, segment_count, region->name);
		rc = EINVAL;
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * find_region_by_name
 *
 * Search this container for a region with the given name.
 **/
static storage_object_t *find_region_by_name(char *region_name,
					     storage_container_t *container)
{
	storage_object_t *region = NULL;
	list_element_t iter;

	LOG_ENTRY();
	LOG_DEBUG("Searching for region %s in container %s.\n",
		  region_name, container->name);

	LIST_FOR_EACH(container->objects_produced, iter, region) {
		if (!strncmp(region->name, region_name, EVMS_NAME_SIZE)) {
			break;
		}
	}

	LOG_EXIT_PTR(region);
	return region;
}

/**
 * create_region_from_vgda
 *
 * Create a region based on information in the VGDA node.
 **/
static int create_region_from_vgda(key_value_t *lv_node,
				   storage_container_t *container,
				   list_anchor_t output_objects)
{
	storage_object_t *region;
	region_data_t *r_data;
	key_value_t *node;
	char region_name[EVMS_NAME_SIZE];
	char region_uuid[LVM2_UUID_LEN+1];
	int rc;

	LOG_ENTRY();

	/* The key of the lv_node is the name of the LV. */
	lv_name_to_region_name(lv_node->key, region_name, container->name);

	/* Check if this region has already been created. */
	region = find_region_by_name(region_name, container);
	if (region) {
		LOG_DETAILS("Region %s already exists in container %s.\n",
			    region_name, container->name);
		rc = EEXIST;
		goto out;
	}

	/* Find the UUID node. */
	node = find_key(get_section(lv_node), "id");
	if (!node) {
		/* FIXME: Can we just create a new UUID?
		 *        Or just continue without a UUID?
		 */
		LOG_ERROR("Error finding \"id\" entry in VGDA for region %s.\n",
			  region_name);
		rc = EINVAL;
		goto out;
	}
	unformat_uuid(node->value.string, region_uuid);

	/* Allocate the new region. */
	region = allocate_region(region_name, region_uuid);
	if (!region) {
		rc = ENOMEM;
		goto out;
	}
	r_data = region->private_data;

	/* Set this even though we haven't added the region to the container.
	 * It means we won't have to keep passing the container pointer around.
	 */
	region->producing_container = container;

	/* Get the status flags from the VGDA. */
	node = find_key(get_section(lv_node), "status");
	if (!node) {
		LOG_ERROR("Error finding \"status\" entry in VGDA for region "
			  "%s.\n", region_name);
		rc = EINVAL;
		goto out;
	}
	read_flags(node, LV_FLAGS, &r_data->flags);

	rc = create_region_mappings_from_vgda(lv_node, region);
	if (rc) {
		deallocate_region(region);
		goto out;
	}

	check_kernel_status(region);
	add_region_to_container(region, container);
	EngFncs->insert_thing(output_objects, region, INSERT_AFTER, NULL);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * discover_regions_in_container
 *
 * Discover the regions for the specified container.
 **/
static int discover_regions_in_container(storage_container_t *container,
					 list_anchor_t output_objects,
					 boolean final_call)
{
	container_data_t *c_data = container->private_data;
	key_value_t *volumes_node, *node;
	int rc, count = 0;

	LOG_ENTRY();
	LOG_DEBUG("Discovering regions for container %s.\n", container->name);

	/* Only discover regions for containers that have a VGDA tree. */
	if (!c_data->vgda_tree) {
		LOG_DEBUG("Skipping region discovery in container %s.\n",
			  container->name);
		goto out;
	}

	/* Only discover regions for containers that have all their PVs. */
	if (!final_call &&
	    (c_data->flags & LVM2_CONTAINER_FLAG_INCOMPLETE)) {
		LOG_DEBUG("Container %s is missing one or more PVs. "
			  "Skipping region discovery.\n", container->name);
		goto out;
	}

	/* Find the logical-volumes section in the VGDA. */
	volumes_node = find_key(get_section(c_data->vgda_tree),
				"logical_volumes");
	if (volumes_node) {
		/* Each entry in the logical-volumes
		 * section describes one region.
		 */
		for (node = get_section(volumes_node); node; node = node->next) {
			rc = create_region_from_vgda(node, container,
						     output_objects);
			if (!rc) {
				count++;
			}
		}
	} else {
		LOG_DEBUG("No regions in container %s.\n", container->name);
	}

	if (final_call) {
		create_freespace_mappings(container);
	}

out:
	LOG_EXIT_INT(count);
	return count;
}

/**
 * discover_regions
 *
 * Discover regions for each container in the global list.
 **/
int discover_regions(list_anchor_t output_objects, boolean final_call)
{
	storage_container_t *container;
	list_element_t iter;
	int count = 0;

	LOG_ENTRY();

	LIST_FOR_EACH(lvm2_containers, iter, container) {
		count += discover_regions_in_container(container,
						       output_objects, final_call);
	}

	LOG_EXIT_INT(count);
	return count;
}


/**
 * Region I/O routines.
 **/


/**
 * map_sector
 *
 * Translate the region-relative lsn/count to a PV-relative lsn/count. Return
 * the new lsn and count and PV object.
 **/
static int map_sector(storage_object_t *region,
		      u_int64_t org_lsn,
		      u_int64_t org_count,
		      u_int64_t *new_lsn,
		      u_int64_t *new_count,
		      storage_object_t **object)
{
	container_data_t *c_data = region->producing_container->private_data;
	region_data_t *r_data = region->private_data;
	u_int64_t pe_size = c_data->pe_size;
	region_mapping_t *r_map;
	physical_extent_t *pe;
	list_element_t iter;
	u_int64_t le, offset_in_le;
	u_int64_t extents_per_stripe, lsn_in_map, stripe;
	u_int64_t chunk_in_map, chunk_in_stripe, offset_in_chunk;
	int rc = 0;

	LOG_ENTRY();

	*new_lsn = 0;
	*object = NULL;

	/* Assume for now that the request will fit in one mapping. */
	*new_count = org_count;

	/* Calculate the LE number and use it to find the mapping
	 * where this I/O request starts.
	 */
	le = org_lsn / pe_size;
	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		if (le >= r_map->start_le &&
		    le < r_map->start_le + r_map->le_count) {
			break;
		}
	}

	if (!r_map) {
		/* FIXME: This shouldn't happen as long as we size-check in
		 * region_io and the region has a complete mapping list.
		 */
		LOG_ERROR("BUG! Could not find mapping for LE %"PRIu64" in "
			  "region %s.\n", le, region->name);
		rc = EINVAL;
		goto out;
	}


	if (r_map->stripe_count > 1) {
		/* Striped mapping. */
		extents_per_stripe = r_map->le_count / r_map->stripe_count;
		lsn_in_map	= org_lsn - (r_map->start_le * pe_size);
		chunk_in_map	= lsn_in_map / r_map->stripe_size;
		offset_in_chunk	= lsn_in_map % r_map->stripe_size;
		stripe		= chunk_in_map % r_map->stripe_count;
		chunk_in_stripe	= chunk_in_map / r_map->stripe_count;

		pe		= r_map->le_maps[stripe].map[0].pe;

		*object		= pe->pv_data->object;
		*new_lsn	= pe->pv_data->pe_start +
				  pe->number * pe_size +
				  chunk_in_stripe * r_map->stripe_size +
				  offset_in_chunk;

		/* If the request goes off the end of this chunk,
		 * adjust the new_count appropriately.
		 */
		if (offset_in_chunk + *new_count > r_map->stripe_size) {
			*new_count = r_map->stripe_size - offset_in_chunk;
		}
	} else {
		/* Linear mapping. */

		/* Subtract off the starting-LE of this mapping to get the LE
		 * within this mapping, and then get the PE from the LE-map.
		 */
		le -= r_map->start_le;
		pe = r_map->le_maps[0].map[le].pe;
		offset_in_le = org_lsn % pe_size;

		*object = pe->pv_data->object;
		*new_lsn = pe->number * pe_size +
			   pe->pv_data->pe_start + offset_in_le;

		/* If the request goes off the end of this mapping,
		 * adjust the new_count appropriately.
		 */
		if (org_lsn + *new_count >
		    (r_map->start_le + r_map->le_count) * pe_size) {
			*new_count = (r_map->start_le + r_map->le_count) *
				     pe_size - org_lsn;
		}
	}

	LOG_DEBUG("Mapped region:%s sector:%"PRIu64" count:%"PRIu64" to object:"
		  "%s sector:%"PRIu64" count:%"PRIu64".\n", region->name,
		  org_lsn, org_count, (*object)->name, *new_lsn, *new_count);

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * region_io
 *
 * Map and submit an I/O request for this region.
 **/
int region_io(storage_object_t *region, lba_t lsn,
	      sector_count_t count, void *buffer, int rw)
{
	storage_object_t *object;
	u_int64_t new_lsn, new_count;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("I/O request (type %d) on region %s, sector %"PRIu64", "
		  "count %"PRIu64".\n", rw, region->name, lsn, count);

	/* Boundary check. */
	if (lsn + count > region->size || region->data_type != DATA_TYPE) {
		LOG_ERROR("I/O request beyond end of region %s.\n",
			  region->name);
		LOG_ERROR("sector:%"PRIu64" + count:%"PRIu64" > region size:%"
			  PRIu64"\n", lsn, count, region->size);
		rc = EIO;
		goto out;
	}

	/* Loop until the entire request has been processed. */
	while (count) {
		rc = map_sector(region, lsn, count, &new_lsn, &new_count, &object);
		if (rc) {
			break;
		}

		switch (rw) {
		case 0:
			rc = READ(object, new_lsn, new_count, buffer);
			break;
		case 1:
			rc = WRITE(object, new_lsn, new_count, buffer);
			break;
		case 2:
			rc = KILL_SECTORS(object, new_lsn, new_count);
			break;
		}

		if (rc) {
			break;
		}

		count -= new_count;
		lsn += new_count;
		buffer += (new_count << EVMS_VSECTOR_SIZE_SHIFT);
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}


/**
 * Creating, expanding, and shrinking regions.
 **/


/**
 * create_new_region
 **/
storage_object_t *create_new_region(storage_container_t *container,
				    char *lv_name)
{
	storage_object_t *region = NULL;
	region_data_t *r_data;
	char region_name[EVMS_NAME_SIZE+1];
	char region_uuid[LVM2_UUID_LEN+1];
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Creating new region %s.\n", lv_name);

	/* Get the name and uuid for the new region. */
	lv_name_to_region_name(lv_name, region_name, container->name);
	rc = create_new_uuid(region_uuid);
	if (rc) {
		goto out;
	}

	region = allocate_region(region_name, region_uuid);
	if (!region) {
		goto out;
	}

	r_data = region->private_data;
	r_data->flags = LVM2_REGION_FLAG_READ |
			LVM2_REGION_FLAG_WRITE |
			LVM2_REGION_FLAG_VISIBLE;

out:
	LOG_EXIT_PTR(region);
	return region;
}

/**
 * prevalidate_extent_allocation
 *
 * Given the container, list of objects, and desired size and stripes,
 * determine if any extent-allocation is possible.
 **/
int prevalidate_extent_allocation(storage_container_t *container,
				  list_anchor_t objects,
				  u_int64_t size,
				  u_int64_t stripes)
{
	container_data_t *c_data = container->private_data;
	u_int64_t extents = size / c_data->pe_size;
	u_int32_t object_count = EngFncs->list_count(objects);
	u_int64_t *object_extents, i = 0;
	storage_object_t *object;
	list_element_t iter;
	int j, rc = 0;

	LOG_ENTRY();

	/* Set up an array containing the number of free extents for
	 * each object in the list.
	 */
	object_extents = EngFncs->engine_alloc(object_count *
					       sizeof(*object_extents));
	if (!object_extents) {
		LOG_ERROR("Error allocating extents array.\n");
		rc = ENOMEM;
		goto out;
	}

	LIST_FOR_EACH(objects, iter, object) {
		object_extents[i++] = count_available_extents_in_pv(object);
	}

	while (extents) {
		for (i = 0, j = 0; i < stripes; i++, j++) {
			for ( ; j < object_count; j++) {
				if (object_extents[j]) {
					object_extents[j]--;
					extents--;
					break;
				}
			}
			if (j == object_count) {
				/* Unable to find a spot for this extent. */
				rc = ENOSPC;
				goto out;
			}
		}
	}

	if (rc) {
		LOG_ERROR("Unable to allocate %"PRIu64" extents on "
			  "%"PRIu64" stripes in container %s.\n",
			  size / c_data->pe_size, stripes, container->name);
	}

out:
	EngFncs->engine_free(object_extents);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * allocate_extents_for_region
 **/
int allocate_extents_for_region(storage_object_t *region,
				list_anchor_t objects_list,
				u_int64_t size,
				u_int64_t stripes,
				u_int64_t stripe_size)
{
	container_data_t *c_data = region->producing_container->private_data;
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	storage_object_t *object;
	list_element_t iter;
	pv_data_t *pv_data, **pv_datas;
	u_int64_t extents = size / c_data->pe_size;
	u_int64_t *start_pes, *pe_counts, pe_count, min_pe_count;
	u_int64_t le = region->size / c_data->pe_size;
	u_int32_t *pv_indexes;
	u_int64_t i, j, k;
	int rc = 0;

	LOG_ENTRY();

	pv_datas = EngFncs->engine_alloc(sizeof(*pv_datas) * stripes);
	pv_indexes = EngFncs->engine_alloc(sizeof(*pv_indexes) * stripes);
	start_pes = EngFncs->engine_alloc(sizeof(*start_pes) * stripes);
	pe_counts = EngFncs->engine_alloc(sizeof(*pe_counts) * stripes);
	if (!pv_datas || !pv_indexes || !start_pes || !pe_counts) {
		rc = ENOMEM;
		goto out;
	}

	while (extents) {
		/* Fill in the pv_datas array. */
		i = 0;
		LIST_FOR_EACH(objects_list, iter, object) {
			pe_count = count_available_extents_in_pv(object);
			if (pe_count) {
				pv_data = object->consuming_private_data;
				pv_datas[i] = pv_data;
				pv_indexes[i] = pv_data->pv_index;
				i++;
			}
			if (i == stripes) {
				/* Found enough objects for this round. */
				break;
			}
		}

		if (i != stripes) {
			/* This shouldn't happen as long as we've
			 * prevalidated the allocation.
			 */
			LOG_CRITICAL("BUG! Could not allocate extents for "
				     "region %s.\n", region->name);
			rc = ENOSPC;
			goto out;
		}

		/* Find the first free extent in each of the objects. */
		min_pe_count = extents / stripes;
		for (i = 0; i < stripes; i++) {
			for (j = 0; j < pv_datas[i]->pe_count; j++) {
				if (extent_is_available(&(pv_datas[i]->pe_map[j]))) {
					/* Find the number of consecutive free
					 * extents starting at this extent.
					 */
					for (k = j + 1; k < pv_datas[i]->pe_count; k++) {
						if (!extent_is_available(&(pv_datas[i]->pe_map[k]))) {
							break;
						}
					}

					start_pes[i] = pv_datas[i]->pe_map[j].number;
					pe_counts[i] = k - j;

					min_pe_count = min(min_pe_count, pe_counts[i]);
					break;
				}
			}
		}

		/* Create a new mapping, add it to the region, and
		 * initialize the mapping.
		 */
		r_map = allocate_region_mapping(r_data, le, min_pe_count *
						stripes, stripes, stripe_size);
		if (!r_map) {
			rc = ENOMEM;
			goto out;
		}

		add_mapping_to_region(r_map, r_data);
		construct_region_mapping(r_map, pv_indexes, start_pes);

		le += min_pe_count * stripes;
		extents -= min_pe_count * stripes;
	}

out:
	EngFncs->engine_free(pv_datas);
	EngFncs->engine_free(pv_indexes);
	EngFncs->engine_free(start_pes);
	EngFncs->engine_free(pe_counts);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * deallocate_extents_from_region
 **/
int deallocate_extents_from_region(storage_object_t *region, u_int64_t size)
{
	container_data_t *c_data = region->producing_container->private_data;
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	u_int64_t extents = size / c_data->pe_size;
	u_int64_t *pe_indexes, i;
	u_int32_t *pv_indexes;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Deallocating %"PRIu64" extents from region %s.\n",
		  extents, region->name);

	/* Work backwards from the end of the mapping list until
	 * we've removed the specified number of extents.
	 */
	r_map = EngFncs->last_thing(r_data->mappings, NULL);
	while (extents) {
		if (r_map->le_count <= extents) {
			/* Remove this entire mapping. */
			LOG_DEBUG("Removing last mapping of %"PRIu64
				  " extents.\n", r_map->le_count);
			extents -= r_map->le_count;

			deconstruct_region_mapping(r_map);
			remove_mapping_from_region(r_map);
			deallocate_region_mapping(r_map);

			r_map = EngFncs->last_thing(r_data->mappings, NULL);
		} else {
			/* Remove part of this mapping. Make sure the number
			 * of extents is a multiple of the number of stripes.
			 */
			if (extents % r_map->stripe_count) {
				extents -= extents % r_map->stripe_count;
				if (!extents) {
					break;
				}
			}
			LOG_DEBUG("Removing %"PRIu64" extents from last "
				  "mapping.\n", extents);

			/* Get the information from the current
			 * LE-maps so we can build the new one.
			 */
			pv_indexes = EngFncs->engine_alloc(sizeof(*pv_indexes) *
							   r_map->stripe_count);
			pe_indexes = EngFncs->engine_alloc(sizeof(*pe_indexes) *
							   r_map->stripe_count);
			if (!pv_indexes || !pe_indexes) {
				rc = ENOMEM;
				break;
			}
			for (i = 0; i < r_map->stripe_count; i++) {
				pv_indexes[i] = r_map->le_maps[i].map[0].pe->pv_data->pv_index;
				pe_indexes[i] = r_map->le_maps[i].map[0].pe->number;
			}

			/* FIXME: We need to do this for the "new" LE-map as well!
			 *        Or, should we just disallow shrinking a region-
			 *        mapping that has a stripe that will be moved?
			 */

			/* Delete the old LE-map and create a new one */
			deconstruct_region_mapping(r_map);
			decrement_region_size(region, r_map);
			deallocate_le_map(r_map);
			r_map->le_count -= extents;
			rc = allocate_le_map(r_map);
			if (rc) {
				break;
			}
			increment_region_size(region, r_map);
			construct_region_mapping(r_map, pv_indexes, pe_indexes);

			EngFncs->engine_free(pv_indexes);
			EngFncs->engine_free(pe_indexes);
			extents = 0;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * mappings_are_consecutive
 *
 * Determine if these two region-mappings are consecutive, and thus could
 * be combined into a single mapping.
 **/
static boolean mappings_are_consecutive(region_mapping_t *r_map1,
					region_mapping_t *r_map2)
{
	logical_extent_t *le_map1, *le_map2;
	u_int64_t i, extents_per_stripe1;
	boolean result = FALSE;

	LOG_ENTRY();

	/* Both mappings must have the same stripe-count and stripe-size. */
	if (!r_map1 || !r_map2 ||
	    r_map1->stripe_count != r_map2->stripe_count ||
	    r_map1->stripe_size != r_map2->stripe_size) {
		goto out;
	}

	/* For each stripe, the first mapping's last LE and the second
	 * mapping's first LE must be on the same PV in consecutive PEs.
	 * Don't treat stripes that are going to be moved as consecutive.
	 */
	extents_per_stripe1 = r_map1->le_count / r_map1->stripe_count;
	for (i = 0; i < r_map1->stripe_count; i++) {
		if (r_map1->le_maps[i].new_map || r_map2->le_maps[i].new_map) {
			goto out;
		}

		le_map1 = r_map1->le_maps[i].map;
		le_map2 = r_map2->le_maps[i].map;
		if ((le_map1[extents_per_stripe1 - 1].pe->pv_data != le_map2[0].pe->pv_data) ||
		    (le_map1[extents_per_stripe1 - 1].pe->number  != le_map2[0].pe->number - 1)) {
			goto out;
		}
	}

	result = TRUE;

out:
	LOG_EXIT_BOOL(result);
	return result;
}

/**
 * merge_region_mappings
 *
 * Search the mappings list and find mappings that are physically adjacent.
 * If they are found, merge them into single mappings.
 **/
int merge_region_mappings(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map1, *r_map2, *r_map3;
	list_element_t iter1, iter2, iter3;
	u_int64_t *pe_indexes, i;
	u_int32_t *pv_indexes;
	boolean result;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Merging mappings for region %s.\n", region->name);

	r_map1 = EngFncs->first_thing(r_data->mappings, &iter1);
	while (r_map1) {
		iter2 = EngFncs->next_element(iter1);
		r_map2 = EngFncs->get_thing(iter2);

		result = mappings_are_consecutive(r_map1, r_map2);
		if (!result) {
			iter1 = iter2;
			r_map1 = r_map2;
			continue;
		}

		/* Allocate a new mapping to
		 * replace these two mappings.
		 */
		r_map3 = allocate_region_mapping(r_data, r_map1->start_le,
						 r_map1->le_count +
						 r_map2->le_count,
						 r_map1->stripe_count,
						 r_map1->stripe_size);
		if (!r_map3) {
			rc = ENOMEM;
			goto out;
		}

		/* Construct the new mapping to overwrite the existing
		 * mappings. Don't need to worry about the "new" LE-map,
		 * since we won't be merging mappings that are being moved.
		 */
		pv_indexes = EngFncs->engine_alloc(r_map1->stripe_count *
						   sizeof(*pv_indexes));
		pe_indexes = EngFncs->engine_alloc(r_map1->stripe_count *
						   sizeof(*pe_indexes));
		if (!pv_indexes || !pe_indexes) {
			rc = ENOMEM;
			goto out;
		}

		for (i = 0; i < r_map1->stripe_count; i++) {
			pv_indexes[i] = r_map1->le_maps[i].map[0].pe->pv_data->pv_index;
			pe_indexes[i] = r_map1->le_maps[i].map[0].pe->number;
		}

		construct_region_mapping(r_map3, pv_indexes, pe_indexes);

		/* Insert the new mapping in the list
		 * and remove the two old mappings.
		 */
		EngFncs->insert_thing(r_data->mappings, r_map3,
				      INSERT_AFTER, iter2);
		iter3 = EngFncs->next_element(iter2);
		EngFncs->remove_element(iter1);
		EngFncs->remove_element(iter2);

		EngFncs->engine_free(pv_indexes);
		EngFncs->engine_free(pe_indexes);

		iter1 = iter3;
		r_map1 = r_map3;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_merge_region_mappings
 *
 * Are there any consecutive region-mappings that can be merged?
 **/
int can_merge_region_mappings(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map1, *r_map2;
	list_element_t iter1, iter2;
	boolean result;
	int rc = EINVAL;

	LOG_ENTRY();
	LOG_DEBUG("Checking if any mappings in region %s can be merged.\n",
		  region->name);

	LIST_FOR_EACH(r_data->mappings, iter1, r_map1) {
		iter2 = EngFncs->next_element(iter1);
		r_map2 = EngFncs->get_thing(iter2);

		result = mappings_are_consecutive(r_map1, r_map2);
		if (result) {
			rc = 0;
			break;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_expand_region
 **/
int can_expand_region(storage_object_t *region)
{
	storage_object_t *freespace;
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Checking if region %s can be expanded.\n", region->name);

	/* Can't expand freespace. */
	if (region->data_type != DATA_TYPE) {
		LOG_DEBUG("Cannot expand freespace region %s.\n", region->name);
		rc = EINVAL;
		goto out;
	}

	/* Check if there's any freespace in the container. */
	freespace = get_freespace_region(region->producing_container->objects_produced);
	if (!freespace->size) {
		LOG_DEBUG("No freespace available to expand region %s.\n",
			  region->name);
		rc = ENOSPC;
		goto out;
	}

out:
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_shrink_region
 **/
int can_shrink_region(storage_object_t *region)
{
	int rc = 0;

	LOG_ENTRY();
	LOG_DEBUG("Checking if region %s can be shrunk.\n", region->name);

	if (region->data_type != DATA_TYPE) {
		LOG_DEBUG("Cannot shrink freespace region %s.\n", region->name);
		rc = EINVAL;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_split_region_mapping
 *
 * Can we split this region-mapping into two mappings? The mapping must have
 * at least twice as many extents as it does stripes.
 **/
int can_split_region_mapping(region_mapping_t *r_map)
{
	int rc = 0;

	LOG_ENTRY();

	if (r_map->le_count < r_map->stripe_count * 2) {
		rc = ENOSPC;
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * can_split_a_region_mapping
 *
 * Can we split at least one region-mapping in this region?
 **/
int can_split_a_region_mapping(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map;
	list_element_t iter;
	int rc = ENOSPC;

	LOG_ENTRY();
	LOG_DEBUG("Checking if any mappings in region %s can be split.\n",
		  region->name);

	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		rc = can_split_region_mapping(r_map);
		if (!rc) {
			break;
		}
	}

	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * split_region_mapping
 *
 * Split one mapping in the specified region according to the specified
 * options.
 **/
int split_region_mapping(storage_object_t *region, option_array_t *options)
{
	region_data_t *r_data = region->private_data;
	region_mapping_t *r_map, *new_r_map1, *new_r_map2;
	list_element_t iter;
	u_int64_t extent_index;
	u_int64_t new_start_le1, new_start_le2;
	u_int64_t new_le_count1, new_le_count2;
	u_int64_t new_extents_per_stripe1;
	u_int64_t *pe_indexes = NULL;
	u_int32_t *pv_indexes = NULL;
	u_int32_t r_map_index, i;
	int rc;

	LOG_ENTRY();
	LOG_DEBUG("Splitting a mapping in region %s.\n", region->name);

	/* Parse and verify the options. */
	split_mapping_parse_options(options, &r_map_index, &extent_index);
	rc = split_mapping_validate_options(region, r_map_index,
					    &extent_index, &r_map);
	if (rc) {
		goto out;
	}

	new_start_le1 = r_map->start_le;
	new_start_le2 = r_map->start_le + extent_index;
	new_le_count1 = extent_index;
	new_le_count2 = r_map->le_count - extent_index;
	new_extents_per_stripe1 = new_le_count1 / r_map->stripe_count;

	/* Get the information from the current LE-maps so we can build the
	 * new one for the second portion of the split.
	 */
	pv_indexes = EngFncs->engine_alloc(sizeof(*pv_indexes) *
					   r_map->stripe_count);
	pe_indexes = EngFncs->engine_alloc(sizeof(*pe_indexes) *
					   r_map->stripe_count);
	if (!pv_indexes || !pe_indexes) {
		rc = ENOMEM;
		goto out;
	}

	/* Allocate new mappings for the new first and second portions
	 * of the split.
	 */
	new_r_map1 = allocate_region_mapping(r_data, new_start_le1,
					     new_le_count1,
					     r_map->stripe_count,
					     r_map->stripe_size);
	if (!new_r_map1) {
		rc = ENOMEM;
		goto out;
	}

	new_r_map2 = allocate_region_mapping(r_data, new_start_le2,
					     new_le_count2,
					     r_map->stripe_count,
					     r_map->stripe_size);
	if (!new_r_map2) {
		deallocate_region_mapping(new_r_map1);
		rc = ENOMEM;
		goto out;
	}
	
	/* Set up the LE-maps for the new mappings. */
	for (i = 0; i < r_map->stripe_count; i++) {
		pv_indexes[i] = r_map->le_maps[i].map[0].pe->pv_data->pv_index;
		pe_indexes[i] = r_map->le_maps[i].map[0].pe->number;
	}
	construct_region_mapping(new_r_map1, pv_indexes, pe_indexes);

	for (i = 0; i < r_map->stripe_count; i++) {
		pv_indexes[i] = r_map->le_maps[i].map[new_extents_per_stripe1].pe->pv_data->pv_index;
		pe_indexes[i] = r_map->le_maps[i].map[new_extents_per_stripe1].pe->number;
	}
	construct_region_mapping(new_r_map2, pv_indexes, pe_indexes);

	/* Add the new mappings and remove the old mapping. */
	iter = EngFncs->find_in_list(r_data->mappings, r_map, NULL, NULL);
	EngFncs->insert_thing(r_data->mappings, new_r_map2, INSERT_AFTER, iter);
	EngFncs->insert_thing(r_data->mappings, new_r_map1, INSERT_AFTER, iter);
	EngFncs->remove_element(iter);

	region->producing_container->flags |= SCFLAG_DIRTY;
	region->flags |= SOFLAG_NEEDS_ACTIVATE;

out:
	EngFncs->engine_free(pv_indexes);
	EngFncs->engine_free(pe_indexes);
	LOG_EXIT_INT(rc);
	return rc;
}

/**
 * smallest_mapping_stripe
 *
 * What's the least number of logical-extents per-stripe of all the mappings
 * in this region?
 **/
u_int64_t smallest_mapping_stripe(storage_object_t *region)
{
	region_data_t *r_data = region->private_data;
	u_int64_t smallest_stripe = -1;
	u_int64_t extents_per_stripe;
	region_mapping_t *r_map;
	list_element_t iter;

	LOG_ENTRY();

	LIST_FOR_EACH(r_data->mappings, iter, r_map) {
		extents_per_stripe = r_map->le_count / r_map->stripe_count;
		smallest_stripe = min(smallest_stripe, extents_per_stripe);
	}

	LOG_EXIT_U64(smallest_stripe);
	return smallest_stripe;
}

