blob: a465c4c721012edcbe5c459ce9f1be7535b5c096 [file] [log] [blame]
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib.h"
#include "metadata.h"
#include "locking.h"
#include "pv_map.h"
#include "lvm-string.h"
#include "toolcontext.h"
#include "lv_alloc.h"
#include "pv_alloc.h"
#include "display.h"
#include "segtype.h"
#include "archiver.h"
#include "activate.h"
#include "str_list.h"
#include "defaults.h"
#include "lvm-exec.h"
#include "memlock.h"
#include "lvmlockd.h"
typedef enum {
PREFERRED,
USE_AREA,
NEXT_PV,
NEXT_AREA
} area_use_t;
/* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/
#define RAID_METADATA_AREA_LEN 1
/* FIXME These ended up getting used differently from first intended. Refactor. */
/* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */
#define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */
#define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */
#define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */
#define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
#define A_CAN_SPLIT 0x10
#define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */
#define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */
#define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */
/*
* Constant parameters during a single allocation attempt.
*/
struct alloc_parms {
alloc_policy_t alloc;
unsigned flags; /* Holds A_* */
struct lv_segment *prev_lvseg;
uint32_t extents_still_needed;
};
/*
* Holds varying state of each allocation attempt.
*/
struct alloc_state {
const struct alloc_parms *alloc_parms;
struct pv_area_used *areas;
uint32_t areas_size;
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
uint32_t allocated; /* Total number of extents allocated so far */
uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */
};
struct lv_names {
const char *old;
const char *new;
};
enum {
LV_TYPE_UNKNOWN,
LV_TYPE_NONE,
LV_TYPE_PUBLIC,
LV_TYPE_PRIVATE,
LV_TYPE_HISTORY,
LV_TYPE_LINEAR,
LV_TYPE_STRIPED,
LV_TYPE_MIRROR,
LV_TYPE_RAID,
LV_TYPE_THIN,
LV_TYPE_CACHE,
LV_TYPE_SPARSE,
LV_TYPE_ORIGIN,
LV_TYPE_THINORIGIN,
LV_TYPE_MULTITHINORIGIN,
LV_TYPE_THICKORIGIN,
LV_TYPE_MULTITHICKORIGIN,
LV_TYPE_CACHEORIGIN,
LV_TYPE_EXTTHINORIGIN,
LV_TYPE_MULTIEXTTHINORIGIN,
LV_TYPE_SNAPSHOT,
LV_TYPE_THINSNAPSHOT,
LV_TYPE_THICKSNAPSHOT,
LV_TYPE_PVMOVE,
LV_TYPE_IMAGE,
LV_TYPE_LOG,
LV_TYPE_METADATA,
LV_TYPE_POOL,
LV_TYPE_DATA,
LV_TYPE_SPARE,
LV_TYPE_VIRTUAL,
LV_TYPE_RAID0,
LV_TYPE_RAID0_META,
LV_TYPE_RAID1,
LV_TYPE_RAID10,
LV_TYPE_RAID4,
LV_TYPE_RAID5,
LV_TYPE_RAID5_LA,
LV_TYPE_RAID5_RA,
LV_TYPE_RAID5_LS,
LV_TYPE_RAID5_RS,
LV_TYPE_RAID6,
LV_TYPE_RAID6_ZR,
LV_TYPE_RAID6_NR,
LV_TYPE_RAID6_NC,
LV_TYPE_LOCKD,
LV_TYPE_SANLOCK
};
static const char *_lv_type_names[] = {
[LV_TYPE_UNKNOWN] = "unknown",
[LV_TYPE_NONE] = "none",
[LV_TYPE_PUBLIC] = "public",
[LV_TYPE_PRIVATE] = "private",
[LV_TYPE_HISTORY] = "history",
[LV_TYPE_LINEAR] = "linear",
[LV_TYPE_STRIPED] = "striped",
[LV_TYPE_MIRROR] = "mirror",
[LV_TYPE_RAID] = "raid",
[LV_TYPE_THIN] = "thin",
[LV_TYPE_CACHE] = "cache",
[LV_TYPE_SPARSE] = "sparse",
[LV_TYPE_ORIGIN] = "origin",
[LV_TYPE_THINORIGIN] = "thinorigin",
[LV_TYPE_MULTITHINORIGIN] = "multithinorigin",
[LV_TYPE_THICKORIGIN] = "thickorigin",
[LV_TYPE_MULTITHICKORIGIN] = "multithickorigin",
[LV_TYPE_CACHEORIGIN] = "cacheorigin",
[LV_TYPE_EXTTHINORIGIN] = "extthinorigin",
[LV_TYPE_MULTIEXTTHINORIGIN] = "multiextthinorigin",
[LV_TYPE_SNAPSHOT] = "snapshot",
[LV_TYPE_THINSNAPSHOT] = "thinsnapshot",
[LV_TYPE_THICKSNAPSHOT] = "thicksnapshot",
[LV_TYPE_PVMOVE] = "pvmove",
[LV_TYPE_IMAGE] = "image",
[LV_TYPE_LOG] = "log",
[LV_TYPE_METADATA] = "metadata",
[LV_TYPE_POOL] = "pool",
[LV_TYPE_DATA] = "data",
[LV_TYPE_SPARE] = "spare",
[LV_TYPE_VIRTUAL] = "virtual",
[LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0,
[LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META,
[LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1,
[LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10,
[LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4,
[LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5,
[LV_TYPE_RAID5_LA] = SEG_TYPE_NAME_RAID5_LA,
[LV_TYPE_RAID5_RA] = SEG_TYPE_NAME_RAID5_RA,
[LV_TYPE_RAID5_LS] = SEG_TYPE_NAME_RAID5_LS,
[LV_TYPE_RAID5_RS] = SEG_TYPE_NAME_RAID5_RS,
[LV_TYPE_RAID6] = SEG_TYPE_NAME_RAID6,
[LV_TYPE_RAID6_ZR] = SEG_TYPE_NAME_RAID6_ZR,
[LV_TYPE_RAID6_NR] = SEG_TYPE_NAME_RAID6_NR,
[LV_TYPE_RAID6_NC] = SEG_TYPE_NAME_RAID6_NC,
[LV_TYPE_LOCKD] = "lockd",
[LV_TYPE_SANLOCK] = "sanlock",
};
static int _lv_layout_and_role_mirror(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_mirror_image(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
goto_bad;
} else if (lv_is_mirror_log(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_LOG]))
goto_bad;
if (lv_is_mirrored(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
} else if (lv_is_pvmove(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
goto_bad;
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_raid(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
const struct segment_type *segtype;
/* non-top-level LVs */
if (lv_is_raid_image(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
goto_bad;
} else if (lv_is_raid_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_pvmove(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
goto_bad;
segtype = first_seg(lv)->segtype;
if (segtype_is_raid0(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID0]))
goto_bad;
} else if (segtype_is_raid1(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID1]))
goto_bad;
} else if (segtype_is_raid10(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID10]))
goto_bad;
} else if (segtype_is_raid4(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID4]))
goto_bad;
} else if (segtype_is_any_raid5(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5]))
goto_bad;
if (segtype_is_raid5_la(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LA]))
goto_bad;
} else if (segtype_is_raid5_ra(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RA]))
goto_bad;
} else if (segtype_is_raid5_ls(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LS]))
goto_bad;
} else if (segtype_is_raid5_rs(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RS]))
goto_bad;
}
} else if (segtype_is_any_raid6(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6]))
goto_bad;
if (segtype_is_raid6_zr(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_ZR]))
goto_bad;
} else if (segtype_is_raid6_nr(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NR]))
goto_bad;
} else if (segtype_is_raid6_nc(segtype)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NC]))
goto_bad;
}
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_thin(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
unsigned snap_count;
struct lv_segment *seg;
/* non-top-level LVs */
if (lv_is_thin_pool_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_thin_pool_data(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_thin_volume(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
goto_bad;
if (lv_is_thin_origin(lv, &snap_count)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINORIGIN]))
goto_bad;
if (snap_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHINORIGIN]))
goto_bad;
}
if ((seg = first_seg(lv)) && (seg->origin || seg->external_lv))
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINSNAPSHOT]))
goto_bad;
} else if (lv_is_thin_pool(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
goto_bad;
*public_lv = 0;
}
if (lv_is_external_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_EXTTHINORIGIN]))
goto_bad;
if (lv->external_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTIEXTTHINORIGIN]))
goto_bad;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_cache(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
int top_level = 0;
/* non-top-level LVs */
if (lv_is_cache_pool_metadata(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
goto_bad;
} else if (lv_is_cache_pool_data(lv)) {
if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
goto_bad;
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
} else if (lv_is_cache_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHEORIGIN]))
goto_bad;
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
} else
top_level = 1;
if (!top_level) {
*public_lv = 0;
return 1;
}
/* top-level LVs */
if (lv_is_cache(lv) &&
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
goto_bad;
else if (lv_is_cache_pool(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
goto_bad;
*public_lv = 0;
}
return 1;
bad:
return 0;
}
static int _lv_layout_and_role_thick_origin_snapshot(struct dm_pool *mem,
const struct logical_volume *lv,
struct dm_list *layout,
struct dm_list *role,
int *public_lv)
{
if (lv_is_origin(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKORIGIN]))
goto_bad;
/*
* Thin volumes are also marked with virtual flag, but we don't show "virtual"
* layout for thin LVs as they have their own keyword for layout - "thin"!
* So rule thin LVs out here!
*/
if (lv_is_virtual(lv) && !lv_is_thin_volume(lv)) {
if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VIRTUAL]))
goto_bad;
*public_lv = 0;
}
if (lv->origin_count > 1 &&
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHICKORIGIN]))
goto_bad;
} else if (lv_is_cow(lv)) {
if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKSNAPSHOT]))
goto_bad;
}
return 1;
bad:
return 0;
}
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
struct dm_list **layout, struct dm_list **role) {
int linear, striped;
struct lv_segment *seg;
int public_lv = 1;
*layout = *role = NULL;
if (!(*layout = str_list_create(mem))) {
log_error("LV layout list allocation failed");
return 0;
}
if (!(*role = str_list_create(mem))) {
log_error("LV role list allocation failed");
goto bad;
}
if (lv_is_historical(lv)) {
if (!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_NONE]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_HISTORY]))
goto_bad;
}
/* Mirrors and related */
if ((lv_is_mirror_type(lv) || lv_is_pvmove(lv)) &&
!_lv_layout_and_role_mirror(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* RAIDs and related */
if (lv_is_raid_type(lv) &&
!_lv_layout_and_role_raid(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Thins and related */
if ((lv_is_thin_type(lv) || lv_is_external_origin(lv)) &&
!_lv_layout_and_role_thin(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Caches and related */
if ((lv_is_cache_type(lv) || lv_is_cache_origin(lv)) &&
!_lv_layout_and_role_cache(mem, lv, *layout, *role, &public_lv))
goto_bad;
/* Pool-specific */
if (lv_is_pool_metadata_spare(lv)) {
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_POOL]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SPARE]))
goto_bad;
public_lv = 0;
}
/* Old-style origins/snapshots, virtual origins */
if (!_lv_layout_and_role_thick_origin_snapshot(mem, lv, *layout, *role, &public_lv))
goto_bad;
if (lv_is_lockd_sanlock_lv(lv)) {
if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_LOCKD]) ||
!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SANLOCK]))
goto_bad;
public_lv = 0;
}
/*
* If layout not yet determined, it must be either
* linear or striped or mixture of these two.
*/
if (dm_list_empty(*layout)) {
linear = striped = 0;
dm_list_iterate_items(seg, &lv->segments) {
if (seg_is_linear(seg))
linear = 1;
else if (seg_is_striped(seg))
striped = 1;
else {
/*
* This should not happen but if it does
* we'll see that there's "unknown" layout
* present. This means we forgot to detect
* the role above and we need add proper
* detection for such role!
*/
log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect "
"layout and role for LV %s/%s.",
lv->vg->name, lv->name);
}
}
if (linear &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_LINEAR]))
goto_bad;
if (striped &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_STRIPED]))
goto_bad;
if (!linear && !striped &&
!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_UNKNOWN]))
goto_bad;
}
/* finally, add either 'public' or 'private' role to the LV */
if (public_lv) {
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PUBLIC]))
goto_bad;
} else {
if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PRIVATE]))
goto_bad;
}
return 1;
bad:
dm_pool_free(mem, *layout);
return 0;
}
struct dm_list_and_mempool {
struct dm_list *list;
struct dm_pool *mem;
};
static int _get_pv_list_for_lv(struct logical_volume *lv, void *data)
{
int dup_found;
uint32_t s;
struct pv_list *pvl;
struct lv_segment *seg;
struct dm_list *pvs = ((struct dm_list_and_mempool *)data)->list;
struct dm_pool *mem = ((struct dm_list_and_mempool *)data)->mem;
dm_list_iterate_items(seg, &lv->segments) {
for (s = 0; s < seg->area_count; s++) {
dup_found = 0;
if (seg_type(seg, s) != AREA_PV)
continue;
/* do not add duplicates */
dm_list_iterate_items(pvl, pvs)
if (pvl->pv == seg_pv(seg, s))
dup_found = 1;
if (dup_found)
continue;
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
log_error("Failed to allocate memory");
return 0;
}
pvl->pv = seg_pv(seg, s);
log_debug_metadata(" %s/%s uses %s", lv->vg->name,
lv->name, pv_dev_name(pvl->pv));
dm_list_add(pvs, &pvl->list);
}
}
return 1;
}
/*
* get_pv_list_for_lv
* @mem - mempool to allocate the list from.
* @lv
* @pvs - The list to add pv_list items to.
*
* 'pvs' is filled with 'pv_list' items for PVs that compose the LV.
* If the 'pvs' list already has items in it, duplicates will not be
* added. So, it is safe to repeatedly call this function for different
* LVs and build up a list of PVs for them all.
*
* Memory to create the list is obtained from the mempool provided.
*
* Returns: 1 on success, 0 on error
*/
int get_pv_list_for_lv(struct dm_pool *mem,
struct logical_volume *lv, struct dm_list *pvs)
{
struct dm_list_and_mempool context = { pvs, mem };
log_debug_metadata("Generating list of PVs that %s/%s uses:",
lv->vg->name, lv->name);
if (!_get_pv_list_for_lv(lv, &context))
return_0;
return for_each_sub_lv(lv, &_get_pv_list_for_lv, &context);
}
/*
* get_default_region_size
* @cmd
*
* 'mirror_region_size' and 'raid_region_size' are effectively the same thing.
* However, "raid" is more inclusive than "mirror", so the name has been
* changed. This function checks for the old setting and warns the user if
* it is being overridden by the new setting (i.e. warn if both settings are
* present).
*
* Note that the config files give defaults in kiB terms, but we
* return the value in terms of sectors.
*
* Returns: default region_size in sectors
*/
static int _get_default_region_size(struct cmd_context *cmd)
{
int mrs, rrs;
/*
* 'mirror_region_size' is the old setting. It is overridden
* by the new setting, 'raid_region_size'.
*/
mrs = 2 * find_config_tree_int(cmd, activation_mirror_region_size_CFG, NULL);
rrs = 2 * find_config_tree_int(cmd, activation_raid_region_size_CFG, NULL);
if (!mrs && !rrs)
return DEFAULT_RAID_REGION_SIZE * 2;
if (!mrs)
return rrs;
if (!rrs)
return mrs;
if (mrs != rrs)
log_verbose("Overriding default 'mirror_region_size' setting"
" with 'raid_region_size' setting of %u kiB",
rrs / 2);
return rrs;
}
static int _round_down_pow2(int r)
{
/* Set all bits to the right of the leftmost set bit */
r |= (r >> 1);
r |= (r >> 2);
r |= (r >> 4);
r |= (r >> 8);
r |= (r >> 16);
/* Pull out the leftmost set bit */
return r & ~(r >> 1);
}
int get_default_region_size(struct cmd_context *cmd)
{
int region_size = _get_default_region_size(cmd);
if (!is_power_of_2(region_size)) {
region_size = _round_down_pow2(region_size);
log_verbose("Reducing region size to %u kiB (power of 2).",
region_size / 2);
}
return region_size;
}
int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
struct lv_segment *seg)
{
struct seg_list *sl;
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
if (sl->seg == seg) {
sl->count++;
return 1;
}
}
log_very_verbose("Adding %s:%" PRIu32 " as an user of %s",
seg->lv->name, seg->le, lv->name);
if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
log_error("Failed to allocate segment list");
return 0;
}
sl->count = 1;
sl->seg = seg;
dm_list_add(&lv->segs_using_this_lv, &sl->list);
return 1;
}
int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
struct lv_segment *seg)
{
struct seg_list *sl;
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
if (sl->seg != seg)
continue;
if (sl->count > 1)
sl->count--;
else {
log_very_verbose("%s:%" PRIu32 " is no longer a user "
"of %s", seg->lv->name, seg->le,
lv->name);
dm_list_del(&sl->list);
}
return 1;
}
log_error(INTERNAL_ERROR "Segment %s:%u is not a user of %s.",
seg->lv->name, seg->le, lv->name);
return 0;
}
/*
* This is a function specialized for the common case where there is
* only one segment which uses the LV.
* e.g. the LV is a layer inserted by insert_layer_for_lv().
*
* In general, walk through lv->segs_using_this_lv.
*/
struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *lv)
{
struct seg_list *sl;
if (!lv) {
log_error(INTERNAL_ERROR "get_only_segment_using_this_lv() called with NULL LV.");
return NULL;
}
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
/* Needs to be he only item in list */
if (!dm_list_end(&lv->segs_using_this_lv, &sl->list))
break;
if (sl->count != 1) {
log_error("%s is expected to have only one segment using it, "
"while %s:%" PRIu32 " uses it %d times.",
display_lvname(lv), sl->seg->lv->name, sl->seg->le, sl->count);
return NULL;
}
return sl->seg;
}
log_error("%s is expected to have only one segment using it, while it has %d.",
display_lvname(lv), dm_list_size(&lv->segs_using_this_lv));
return NULL;
}
/*
* PVs used by a segment of an LV
*/
struct seg_pvs {
struct dm_list list;
struct dm_list pvs; /* struct pv_list */
uint32_t le;
uint32_t len;
};
static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le)
{
struct seg_pvs *spvs;
dm_list_iterate_items(spvs, list)
if (le >= spvs->le && le < spvs->le + spvs->len)
return spvs;
return NULL;
}
/*
* Find first unused LV number.
*/
uint32_t find_free_lvnum(struct logical_volume *lv)
{
int lvnum_used[MAX_RESTRICTED_LVS + 1] = { 0 };
uint32_t i = 0;
struct lv_list *lvl;
int lvnum;
dm_list_iterate_items(lvl, &lv->vg->lvs) {
lvnum = lvnum_from_lvid(&lvl->lv->lvid);
if (lvnum <= MAX_RESTRICTED_LVS)
lvnum_used[lvnum] = 1;
}
while (lvnum_used[i])
i++;
/* FIXME What if none are free? */
return i;
}
dm_percent_t copy_percent(const struct logical_volume *lv)
{
uint32_t numerator = 0u, denominator = 0u;
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv->segments) {
denominator += seg->area_len;
/* FIXME Generalise name of 'extents_copied' field */
if (((seg_is_raid(seg) && !seg_is_any_raid0(seg)) || seg_is_mirrored(seg)) &&
(seg->area_count > 1))
numerator += seg->extents_copied;
else
numerator += seg->area_len;
}
return denominator ? dm_make_percent(numerator, denominator) : DM_PERCENT_100;
}
/* Round up extents to next stripe boundary for number of stripes */
static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t extents,
uint32_t stripes, int extend)
{
uint32_t size_rest, new_extents = extents;
if (!stripes)
return extents;
/* Round up extents to stripe divisible amount */
if ((size_rest = extents % stripes)) {
new_extents += extend ? stripes - size_rest : -size_rest;
log_print_unless_silent("Rounding size %s (%d extents) up to stripe boundary size %s (%d extents).",
display_size(vg->cmd, (uint64_t) extents * vg->extent_size), extents,
display_size(vg->cmd, (uint64_t) new_extents * vg->extent_size), new_extents);
}
return new_extents;
}
/*
* All lv_segments get created here.
*/
struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
struct logical_volume *lv,
uint32_t le, uint32_t len,
uint64_t status,
uint32_t stripe_size,
struct logical_volume *log_lv,
uint32_t area_count,
uint32_t area_len,
uint32_t chunk_size,
uint32_t region_size,
uint32_t extents_copied,
struct lv_segment *pvmove_source_seg)
{
struct lv_segment *seg;
struct dm_pool *mem = lv->vg->vgmem;
uint32_t areas_sz = area_count * sizeof(*seg->areas);
if (!segtype) {
log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype.");
return NULL;
}
if (!(seg = dm_pool_zalloc(mem, sizeof(*seg))))
return_NULL;
if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg);
return_NULL;
}
if (segtype_is_raid(segtype) &&
!segtype_is_raid0(segtype) &&
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg); /* frees everything alloced since seg */
return_NULL;
}
seg->segtype = segtype;
seg->lv = lv;
seg->le = le;
seg->len = len;
seg->status = status;
seg->stripe_size = stripe_size;
seg->area_count = area_count;
seg->area_len = area_len;
seg->chunk_size = chunk_size;
seg->region_size = region_size;
seg->extents_copied = extents_copied;
seg->pvmove_source_seg = pvmove_source_seg;
dm_list_init(&seg->tags);
dm_list_init(&seg->origin_list);
dm_list_init(&seg->thin_messages);
if (log_lv && !attach_mirror_log(seg, log_lv))
return_NULL;
if (segtype_is_mirror(segtype))
lv->status |= MIRROR;
if (segtype_is_mirrored(segtype))
lv->status |= MIRRORED;
return seg;
}
static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s,
uint32_t area_reduction, int with_discard)
{
struct lv_segment *cache_seg;
struct logical_volume *lv = seg_lv(seg, s);
if (seg_type(seg, s) == AREA_UNASSIGNED)
return 1;
if (seg_type(seg, s) == AREA_PV) {
if (with_discard && !discard_pv_segment(seg_pvseg(seg, s), area_reduction))
return_0;
if (!release_pv_segment(seg_pvseg(seg, s), area_reduction))
return_0;
if (seg->area_len == area_reduction)
seg_type(seg, s) = AREA_UNASSIGNED;
return 1;
}
if (lv_is_mirror_image(lv) ||
lv_is_thin_pool_data(lv) ||
lv_is_cache_pool_data(lv)) {
if (!lv_reduce(lv, area_reduction))
return_0; /* FIXME: any upper level reporting */
return 1;
}
if (seg_is_cache_pool(seg) &&
!dm_list_empty(&seg->lv->segs_using_this_lv)) {
if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
return_0;
if (!lv_cache_remove(cache_seg->lv))
return_0;
}
if (lv_is_raid_image(lv)) {
/*
* FIXME: Use lv_reduce not lv_remove
* We use lv_remove for now, because I haven't figured out
* why lv_reduce won't remove the LV.
lv_reduce(lv, area_reduction);
*/
if (area_reduction != seg->area_len) {
log_error("Unable to reduce RAID LV - operation not implemented.");
return_0;
} else {
if (!lv_remove(lv)) {
log_error("Failed to remove RAID image %s",
lv->name);
return 0;
}
}
/* Remove metadata area if image has been removed */
if (seg->meta_areas && seg_metalv(seg, s) && (area_reduction == seg->area_len)) {
if (!lv_reduce(seg_metalv(seg, s),
seg_metalv(seg, s)->le_count)) {
log_error("Failed to remove RAID meta-device %s",
seg_metalv(seg, s)->name);
return 0;
}
}
return 1;
}
if (area_reduction == seg->area_len) {
log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from "
"the top of LV %s:%" PRIu32,
seg->lv->name, seg->le, s,
lv->name, seg_le(seg, s));
if (!remove_seg_from_segs_using_this_lv(lv, seg))
return_0;
seg_lv(seg, s) = NULL;
seg_le(seg, s) = 0;
seg_type(seg, s) = AREA_UNASSIGNED;
}
return 1;
}
int release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
{
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 1);
}
int release_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction)
{
return _release_and_discard_lv_segment_area(seg, s, area_reduction, 0);
}
/*
* Move a segment area from one segment to another
*/
int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
struct lv_segment *seg_from, uint32_t area_from)
{
struct physical_volume *pv;
struct logical_volume *lv;
uint32_t pe, le;
switch (seg_type(seg_from, area_from)) {
case AREA_PV:
pv = seg_pv(seg_from, area_from);
pe = seg_pe(seg_from, area_from);
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
return_0;
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
return_0;
break;
case AREA_LV:
lv = seg_lv(seg_from, area_from);
le = seg_le(seg_from, area_from);
if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len))
return_0;
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0))
return_0;
break;
case AREA_UNASSIGNED:
if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len))
return_0;
}
return 1;
}
/*
* Link part of a PV to an LV segment.
*/
int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num,
struct physical_volume *pv, uint32_t pe)
{
seg->areas[area_num].type = AREA_PV;
if (!(seg_pvseg(seg, area_num) =
assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num)))
return_0;
return 1;
}
/*
* Link one LV segment to another. Assumes sizes already match.
*/
int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
struct logical_volume *lv, uint32_t le,
uint64_t status)
{
log_very_verbose("Stack %s:%" PRIu32 "[%" PRIu32 "] on LV %s:%" PRIu32,
seg->lv->name, seg->le, area_num, lv->name, le);
if (status & RAID_META) {
seg->meta_areas[area_num].type = AREA_LV;
seg_metalv(seg, area_num) = lv;
if (le) {
log_error(INTERNAL_ERROR "Meta le != 0");
return 0;
}
seg_metale(seg, area_num) = 0;
} else {
seg->areas[area_num].type = AREA_LV;
seg_lv(seg, area_num) = lv;
seg_le(seg, area_num) = le;
}
lv->status |= status;
if (!add_seg_to_segs_using_this_lv(lv, seg))
return_0;
return 1;
}
/*
* Prepare for adding parallel areas to an existing segment.
*/
static int _lv_segment_add_areas(struct logical_volume *lv,
struct lv_segment *seg,
uint32_t new_area_count)
{
struct lv_segment_area *newareas;
uint32_t areas_sz = new_area_count * sizeof(*newareas);
if (!(newareas = dm_pool_zalloc(lv->vg->cmd->mem, areas_sz)))
return_0;
memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
seg->areas = newareas;
seg->area_count = new_area_count;
return 1;
}
static uint32_t _calc_area_multiple(const struct segment_type *segtype,
const uint32_t area_count,
const uint32_t stripes)
{
if (!area_count)
return 1;
/* Striped */
if (segtype_is_striped(segtype))
return area_count;
/* Parity RAID (e.g. RAID 4/5/6) */
if (segtype_is_raid(segtype) && segtype->parity_devs) {
/*
* As articulated in _alloc_init, we can tell by
* the area_count whether a replacement drive is
* being allocated; and if this is the case, then
* there is no area_multiple that should be used.
*/
if (area_count <= segtype->parity_devs)
return 1;
return area_count - segtype->parity_devs;
}
/*
* RAID10 - only has 2-way mirror right now.
* If we are to move beyond 2-way RAID10, then
* the 'stripes' argument will always need to
* be given.
*/
if (!strcmp(segtype->name, _lv_type_names[LV_TYPE_RAID10])) {
if (!stripes)
return area_count / 2;
return stripes;
}
/* Mirrored stripes */
if (stripes)
return stripes;
/* Mirrored */
return 1;
}
/*
* Reduce the size of an lv_segment. New size can be zero.
*/
static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
{
uint32_t area_reduction, s;
/* Caller must ensure exact divisibility */
if (seg_is_striped(seg)) {
if (reduction % seg->area_count) {
log_error("Segment extent reduction %" PRIu32
" not divisible by #stripes %" PRIu32,
reduction, seg->area_count);
return 0;
}
area_reduction = (reduction / seg->area_count);
} else
area_reduction = reduction;
for (s = 0; s < seg->area_count; s++)
if (!release_and_discard_lv_segment_area(seg, s, area_reduction))
return_0;
seg->len -= reduction;
seg->area_len -= area_reduction;
return 1;
}
/*
* Entry point for all LV reductions in size.
*/
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
{
struct lv_segment *seg;
uint32_t count = extents;
uint32_t reduction;
struct logical_volume *pool_lv;
if (lv_is_merging_origin(lv)) {
log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
find_snapshot(lv)->lv->name, lv->name);
clear_snapshot_merge(lv);
}
dm_list_iterate_back_items(seg, &lv->segments) {
if (!count)
break;
if (seg->len <= count) {
if (seg->merge_lv) {
log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.",
seg->lv->name, seg->merge_lv->name);
clear_snapshot_merge(seg->merge_lv);
}
/* remove this segment completely */
/* FIXME Check this is safe */
if (seg->log_lv && !lv_remove(seg->log_lv))
return_0;
if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
return_0;
/* Remove cache origin only when removing (not on lv_empty()) */
if (delete && seg_is_cache(seg)) {
if (lv_is_pending_delete(seg->lv)) {
/* Just dropping reference on origin when pending delete */
if (!remove_seg_from_segs_using_this_lv(seg_lv(seg, 0), seg))
return_0;
seg_lv(seg, 0) = NULL;
seg_le(seg, 0) = 0;
seg_type(seg, 0) = AREA_UNASSIGNED;
if (seg->pool_lv && !detach_pool_lv(seg))
return_0;
} else if (!lv_remove(seg_lv(seg, 0)))
return_0;
}
if ((pool_lv = seg->pool_lv)) {
if (!detach_pool_lv(seg))
return_0;
/* When removing cached LV, remove pool as well */
if (seg_is_cache(seg) && !lv_remove(pool_lv))
return_0;
}
dm_list_del(&seg->list);
reduction = seg->len;
} else
reduction = count;
if (!_lv_segment_reduce(seg, reduction))
return_0;
count -= reduction;
}
lv->le_count -= extents;
lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
if (!delete)
return 1;
if (lv == lv->vg->pool_metadata_spare_lv) {
lv->status &= ~POOL_METADATA_SPARE;
lv->vg->pool_metadata_spare_lv = NULL;
}
/* Remove the LV if it is now empty */
if (!lv->le_count && !unlink_lv_from_vg(lv))
return_0;
else if (lv->vg->fid->fmt->ops->lv_setup &&
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
return_0;
return 1;
}
/*
* Empty an LV.
*/
int lv_empty(struct logical_volume *lv)
{
return _lv_reduce(lv, lv->le_count, 0);
}
/*
* Empty an LV and add error segment.
*/
int replace_lv_with_error_segment(struct logical_volume *lv)
{
uint32_t len = lv->le_count;
if (len && !lv_empty(lv))
return_0;
/* Minimum size required for a table. */
if (!len)
len = 1;
/*
* Since we are replacing the whatever-was-there with
* an error segment, we should also clear any flags
* that suggest it is anything other than "error".
*/
/* FIXME Check for other flags that need removing */
lv->status &= ~(MIRROR|MIRRORED|PVMOVE|LOCKED);
/* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */
if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR)))
return_0;
return 1;
}
int lv_refresh_suspend_resume(const struct logical_volume *lv)
{
struct cmd_context *cmd = lv->vg->cmd;
int r = 1;
if (!cmd->partial_activation && lv_is_partial(lv)) {
log_error("Refusing refresh of partial LV %s."
" Use '--activationmode partial' to override.",
display_lvname(lv));
return 0;
}
if (!suspend_lv(cmd, lv)) {
log_error("Failed to suspend %s.", display_lvname(lv));
r = 0;
}
if (!resume_lv(cmd, lv)) {
log_error("Failed to reactivate %s.", display_lvname(lv));
r = 0;
}
return r;
}
/*
* Remove given number of extents from LV.
*/
int lv_reduce(struct logical_volume *lv, uint32_t extents)
{
return _lv_reduce(lv, extents, 1);
}
int historical_glv_remove(struct generic_logical_volume *glv)
{
struct generic_logical_volume *origin_glv;
struct glv_list *glvl, *user_glvl;
struct historical_logical_volume *hlv;
int reconnected;
if (!glv || !glv->is_historical)
return_0;
hlv = glv->historical;
if (!(glv = find_historical_glv(hlv->vg, hlv->name, 0, &glvl))) {
if (!(find_historical_glv(hlv->vg, hlv->name, 1, NULL))) {
log_error(INTERNAL_ERROR "historical_glv_remove: historical LV %s/-%s not found ",
hlv->vg->name, hlv->name);
return 0;
} else {
log_verbose("Historical LV %s/-%s already on removed list ",
hlv->vg->name, hlv->name);
return 1;
}
}
if ((origin_glv = hlv->indirect_origin) &&
!remove_glv_from_indirect_glvs(origin_glv, glv))
return_0;
dm_list_iterate_items(user_glvl, &hlv->indirect_glvs) {
reconnected = 0;
if ((origin_glv && !origin_glv->is_historical) && !user_glvl->glv->is_historical)
log_verbose("Removing historical connection between %s and %s.",
origin_glv->live->name, user_glvl->glv->live->name);
else if (hlv->vg->cmd->record_historical_lvs) {
if (!add_glv_to_indirect_glvs(hlv->vg->vgmem, origin_glv, user_glvl->glv))
return_0;
reconnected = 1;
}
if (!reconnected) {
/*
* Break ancestry chain if we're removing historical LV and tracking
* historical LVs is switched off either via:
* - "metadata/record_lvs_history=0" config
* - "--nohistory" cmd line option
*
* Also, break the chain if we're unable to store such connection at all
* because we're removing the very last historical LV that was in between
* live LVs - pure live LVs can't store any indirect origin relation in
* metadata - we need at least one historical LV to do that!
*/
if (user_glvl->glv->is_historical)
user_glvl->glv->historical->indirect_origin = NULL;
else
first_seg(user_glvl->glv->live)->indirect_origin = NULL;
}
}
dm_list_move(&hlv->vg->removed_historical_lvs, &glvl->list);
return 1;
}
/*
* Completely remove an LV.
*/
int lv_remove(struct logical_volume *lv)
{
if (lv_is_historical(lv))
return historical_glv_remove(lv->this_glv);
if (!lv_reduce(lv, lv->le_count))
return_0;
return 1;
}
/*
* A set of contiguous physical extents allocated
*/
struct alloced_area {
struct dm_list list;
struct physical_volume *pv;
uint32_t pe;
uint32_t len;
};
/*
* Details of an allocation attempt
*/
struct alloc_handle {
struct cmd_context *cmd;
struct dm_pool *mem;
alloc_policy_t alloc; /* Overall policy */
int approx_alloc; /* get as much as possible up to new_extents */
uint32_t new_extents; /* Number of new extents required */
uint32_t area_count; /* Number of parallel areas */
uint32_t parity_count; /* Adds to area_count, but not area_multiple */
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
uint32_t log_area_count; /* Number of parallel logs */
uint32_t metadata_area_count; /* Number of parallel metadata areas */
uint32_t log_len; /* Length of log/metadata_area */
uint32_t region_size; /* Mirror region size */
uint32_t total_area_len; /* Total number of parallel extents */
unsigned maximise_cling;
unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */
/*
* RAID devices require a metadata area that accompanies each
* device. During initial creation, it is best to look for space
* that is new_extents + log_len and then split that between two
* allocated areas when found. 'alloc_and_split_meta' indicates
* that this is the desired dynamic.
*
* This same idea is used by cache LVs to get the metadata device
* and data device allocated together.
*/
unsigned alloc_and_split_meta;
unsigned split_metadata_is_allocated; /* Metadata has been allocated */
const struct dm_config_node *cling_tag_list_cn;
struct dm_list *parallel_areas; /* PVs to avoid */
/*
* Contains area_count lists of areas allocated to data stripes
* followed by log_area_count lists of areas allocated to log stripes.
*/
struct dm_list alloced_areas[0];
};
/*
* Returns log device size in extents, algorithm from kernel code
*/
#define BYTE_SHIFT 3
static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
{
size_t area_size, bitset_size, log_size, region_count;
area_size = (size_t)area_len * pe_size;
region_count = dm_div_up(area_size, region_size);
/* Work out how many "unsigned long"s we need to hold the bitset. */
bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT);
bitset_size >>= BYTE_SHIFT;
/* Log device holds both header and bitset. */
log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT);
log_size >>= SECTOR_SHIFT;
log_size = dm_div_up(log_size, pe_size);
/*
* Kernel requires a mirror to be at least 1 region large. So,
* if our mirror log is itself a mirror, it must be at least
* 1 region large. This restriction may not be necessary for
* non-mirrored logs, but we apply the rule anyway.
*
* (The other option is to make the region size of the log
* mirror smaller than the mirror it is acting as a log for,
* but that really complicates things. It's much easier to
* keep the region_size the same for both.)
*/
return (log_size > (region_size / pe_size)) ? log_size :
(region_size / pe_size);
}
/* Is there enough total space or should we give up immediately? */
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
uint32_t allocated, uint32_t extents_still_needed)
{
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
uint32_t metadata_extents_needed = ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN; /* One each */
uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed;
uint32_t free_pes = pv_maps_size(pvms);
if (total_extents_needed > free_pes) {
log_error("Insufficient free space: %" PRIu32 " extents needed,"
" but only %" PRIu32 " available",
total_extents_needed, free_pes);
return 0;
}
return 1;
}
/* For striped mirrors, all the areas are counted, through the mirror layer */
static uint32_t _stripes_per_mimage(struct lv_segment *seg)
{
struct lv_segment *last_lvseg;
if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) {
last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment);
if (seg_is_striped(last_lvseg))
return last_lvseg->area_count;
}
return 1;
}
static void _init_alloc_parms(struct alloc_handle *ah,
struct alloc_parms *alloc_parms,
alloc_policy_t alloc,
struct lv_segment *prev_lvseg, unsigned can_split,
uint32_t allocated, uint32_t extents_still_needed)
{
alloc_parms->alloc = alloc;
alloc_parms->prev_lvseg = prev_lvseg;
alloc_parms->flags = 0;
alloc_parms->extents_still_needed = extents_still_needed;
/*
* Only attempt contiguous/cling allocation to previous segment
* areas if the number of areas matches.
*/
if (alloc_parms->prev_lvseg &&
((ah->area_count + ah->parity_count) == prev_lvseg->area_count)) {
alloc_parms->flags |= A_AREA_COUNT_MATCHES;
/* Are there any preceding segments we must follow on from? */
if (alloc_parms->alloc == ALLOC_CONTIGUOUS) {
alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG;
alloc_parms->flags |= A_POSITIONAL_FILL;
} else if ((alloc_parms->alloc == ALLOC_CLING) ||
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_LVSEG;
alloc_parms->flags |= A_POSITIONAL_FILL;
}
} else
/*
* A cling allocation that follows a successful contiguous
* allocation must use the same PVs (or else fail).
*/
if ((alloc_parms->alloc == ALLOC_CLING) ||
(alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_ALLOCED;
alloc_parms->flags |= A_POSITIONAL_FILL;
}
if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
alloc_parms->flags |= A_CLING_BY_TAGS;
if (!(alloc_parms->alloc & A_POSITIONAL_FILL) &&
(alloc_parms->alloc == ALLOC_CONTIGUOUS) &&
ah->cling_tag_list_cn)
alloc_parms->flags |= A_PARTITION_BY_TAGS;
/*
* For normal allocations, if any extents have already been found
* for allocation, prefer to place further extents on the same disks as
* have already been used.
*/
if (ah->maximise_cling &&
(alloc_parms->alloc == ALLOC_NORMAL) &&
(allocated != alloc_parms->extents_still_needed))
alloc_parms->flags |= A_CLING_TO_ALLOCED;
if (can_split)
alloc_parms->flags |= A_CAN_SPLIT;
}
static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas)
{
struct seg_pvs *spvs;
struct pv_list *pvl;
char *pvnames;
if (!parallel_areas)
return 1;
dm_list_iterate_items(spvs, parallel_areas) {
if (!dm_pool_begin_object(mem, 256)) {
log_error("dm_pool_begin_object failed");
return 0;
}
dm_list_iterate_items(pvl, &spvs->pvs) {
if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
if (!dm_pool_grow_object(mem, " ", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
}
if (!dm_pool_grow_object(mem, "\0", 1)) {
log_error("dm_pool_grow_object failed");
dm_pool_abandon_object(mem);
return 0;
}
pvnames = dm_pool_end_object(mem);
log_debug_alloc("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
spvs->le, spvs->len, pvnames);
dm_pool_free(mem, pvnames);
}
return 1;
}
/* Handles also stacking */
static int _setup_lv_size(struct logical_volume *lv, uint32_t extents)
{
struct lv_segment *thin_pool_seg;
lv->le_count = extents;
lv->size = (uint64_t) extents * lv->vg->extent_size;
if (lv_is_thin_pool_data(lv)) {
if (!(thin_pool_seg = get_only_segment_using_this_lv(lv)))
return_0;
/* Update thin pool segment from the layered LV */
thin_pool_seg->lv->le_count =
thin_pool_seg->len =
thin_pool_seg->area_len = lv->le_count;
thin_pool_seg->lv->size = lv->size;
}
return 1;
}
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
uint32_t area_count,
uint32_t stripe_size,
const struct segment_type *segtype,
struct alloced_area *aa,
uint32_t region_size)
{
uint32_t s, extents, area_multiple;
struct lv_segment *seg;
area_multiple = _calc_area_multiple(segtype, area_count, 0);
extents = aa[0].len * area_multiple;
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents,
status, stripe_size, NULL,
area_count,
aa[0].len, 0u, region_size, 0u, NULL))) {
log_error("Couldn't allocate new LV segment.");
return 0;
}
for (s = 0; s < area_count; s++)
if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
return_0;
dm_list_add(&lv->segments, &seg->list);
extents = aa[0].len * area_multiple;
if (!_setup_lv_size(lv, lv->le_count + extents))
return_0;
return 1;
}
static int _setup_alloced_segments(struct logical_volume *lv,
struct dm_list *alloced_areas,
uint32_t area_count,
uint64_t status,
uint32_t stripe_size,
const struct segment_type *segtype,
uint32_t region_size)
{
struct alloced_area *aa;
dm_list_iterate_items(aa, &alloced_areas[0]) {
if (!_setup_alloced_segment(lv, status, area_count,
stripe_size, segtype, aa,
region_size))
return_0;
}
return 1;
}
/*
* This function takes a list of pv_areas and adds them to allocated_areas.
* If the complete area is not needed then it gets split.
* The part used is removed from the pv_map so it can't be allocated twice.
*/
static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate,
struct alloc_state *alloc_state, uint32_t ix_log_offset)
{
uint32_t area_len, len;
uint32_t s, smeta;
uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
uint32_t total_area_count;
struct alloced_area *aa;
struct pv_area *pva;
total_area_count = ah->area_count + ah->parity_count + alloc_state->log_area_count_still_needed;
if (!total_area_count) {
log_warn(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
return 1;
}
area_len = max_to_allocate / ah->area_multiple;
/* Reduce area_len to the smallest of the areas */
for (s = 0; s < ah->area_count + ah->parity_count; s++)
if (area_len > alloc_state->areas[s].used)
area_len = alloc_state->areas[s].used;
len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? total_area_count * 2 : total_area_count;
len *= sizeof(*aa);
if (!(aa = dm_pool_alloc(ah->mem, len))) {
log_error("alloced_area allocation failed");
return 0;
}
/*
* Areas consists of area_count areas for data stripes, then
* ix_log_skip areas to skip, then log_area_count areas to use for the
* log, then some areas too small for the log.
*/
len = area_len;
for (s = 0; s < total_area_count; s++) {
if (s == (ah->area_count + ah->parity_count)) {
ix_log_skip = ix_log_offset - ah->area_count;
len = ah->log_len;
}
pva = alloc_state->areas[s + ix_log_skip].pva;
if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) {
/*
* The metadata area goes at the front of the allocated
* space for now, but could easily go at the end (or
* middle!).
*
* Even though we split these two from the same
* allocation, we store the images at the beginning
* of the areas array and the metadata at the end.
*/
smeta = s + ah->area_count + ah->parity_count;
aa[smeta].pv = pva->map->pv;
aa[smeta].pe = pva->start;
aa[smeta].len = ah->log_len;
log_debug_alloc("Allocating parallel metadata area %" PRIu32
" on %s start PE %" PRIu32
" length %" PRIu32 ".",
(smeta - (ah->area_count + ah->parity_count)),
pv_dev_name(aa[smeta].pv), aa[smeta].pe,
ah->log_len);
consume_pv_area(pva, ah->log_len);
dm_list_add(&ah->alloced_areas[smeta], &aa[smeta].list);
}
aa[s].len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? len - ah->log_len : len;
/* Skip empty allocations */
if (!aa[s].len)
continue;
aa[s].pv = pva->map->pv;
aa[s].pe = pva->start;
log_debug_alloc("Allocating parallel area %" PRIu32
" on %s start PE %" PRIu32 " length %" PRIu32 ".",
s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
consume_pv_area(pva, aa[s].len);
dm_list_add(&ah->alloced_areas[s], &aa[s].list);
}
/* Only need to alloc metadata from the first batch */
if (ah->alloc_and_split_meta)
ah->split_metadata_is_allocated = 1;
ah->total_area_len += area_len;
alloc_state->allocated += area_len * ah->area_multiple;
return 1;
}
/*
* Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len.
* If any constituent area contains more than one segment, max_seg_len is
* reduced to cover only the first.
* fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
* In the last case, this function passes on the return code.
* FIXME I think some callers are expecting this to check all PV segments used by an LV.
*/
static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t le, uint32_t len, struct lv_segment *seg,
uint32_t *max_seg_len,
uint32_t first_area, uint32_t max_areas,
int top_level_area_index,
int only_single_area_segments,
int (*fn)(struct cmd_context *cmd,
struct pv_segment *peg, uint32_t s,
void *data),
void *data)
{
uint32_t s;
uint32_t remaining_seg_len, area_len, area_multiple;
uint32_t stripes_per_mimage = 1;
int r = 1;
if (!seg && !(seg = find_seg_by_le(lv, le))) {
log_error("Failed to find segment for %s extent %" PRIu32,
lv->name, le);
return 0;
}
/* Remaining logical length of segment */
remaining_seg_len = seg->len - (le - seg->le);
if (remaining_seg_len > len)
remaining_seg_len = len;
if (max_seg_len && *max_seg_len > remaining_seg_len)
*max_seg_len = remaining_seg_len;
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
area_len = (remaining_seg_len / area_multiple) ? : 1;
/* For striped mirrors, all the areas are counted, through the mirror layer */
if (top_level_area_index == -1)
stripes_per_mimage = _stripes_per_mimage(seg);
for (s = first_area;
s < seg->area_count && (!max_areas || s <= max_areas);
s++) {
if (seg_type(seg, s) == AREA_LV) {
if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
seg_le(seg, s) +
(le - seg->le) / area_multiple,
area_len, NULL, max_seg_len, 0,
(stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
(top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
only_single_area_segments, fn,
data)))
stack;
} else if (seg_type(seg, s) == AREA_PV)
if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data)))
stack;
if (r != 1)
return r;
}
/* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */
if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) {
if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL,
NULL, 0, 0, 0, only_single_area_segments,
fn, data)))
stack;
if (r != 1)
return r;
}
/* FIXME Add snapshot cow, thin meta etc. */
/*
if (!only_single_area_segments && !max_areas && seg_is_raid(seg)) {
for (s = first_area; s < seg->area_count; s++) {
if (seg_metalv(seg, s))
if (!(r = _for_each_pv(cmd, seg_metalv(seg, s), 0, seg_metalv(seg, s)->le_count, NULL,
NULL, 0, 0, 0, 0, fn, data)))
stack;
if (r != 1)
return r;
}
}
*/
return 1;
}
static int _comp_area(const void *l, const void *r)
{
const struct pv_area_used *lhs = (const struct pv_area_used *) l;
const struct pv_area_used *rhs = (const struct pv_area_used *) r;
if (lhs->used < rhs->used)
return 1;
else if (lhs->used > rhs->used)
return -1;
return 0;
}
/*
* Search for pvseg that matches condition
*/
struct pv_match {
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
struct alloc_handle *ah;
struct alloc_state *alloc_state;
struct pv_area *pva;
const struct dm_config_node *cling_tag_list_cn;
int s; /* Area index of match */
};
/*
* Is PV area on the same PV?
*/
static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
{
if (pvseg->pv != pva->map->pv)
return 0;
return 1;
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags?
* If tags_list_str is set, then instead we generate a list of matching tags for printing.
*/
static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only,
struct dm_pool *mem, const char **tags_list_str)
{
const struct dm_config_value *cv;
const char *str;
const char *tag_matched;
struct dm_list *tags_to_match = tags_list_str ? NULL : pv_tags ? : &pv2->tags;
struct dm_str_list *sl;
unsigned first_tag = 1;
if (tags_list_str && !dm_pool_begin_object(mem, 256)) {
log_error("PV tags string allocation failed");
return 0;
}
for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
if (validate_only)
log_warn("WARNING: Ignoring invalid string in config file entry "
"allocation/cling_tag_list");
continue;
}
str = cv->v.str;
if (!*str) {
if (validate_only)
log_warn("WARNING: Ignoring empty string in config file entry "
"allocation/cling_tag_list");
continue;
}
if (*str != '@') {
if (validate_only)
log_warn("WARNING: Ignoring string not starting with @ in config file entry "
"allocation/cling_tag_list: %s", str);
continue;
}
str++;
if (!*str) {
if (validate_only)
log_warn("WARNING: Ignoring empty tag in config file entry "
"allocation/cling_tag_list");
continue;
}
if (validate_only)
continue;
/* Wildcard matches any tag against any tag. */
if (!strcmp(str, "*")) {
if (tags_list_str) {
dm_list_iterate_items(sl, &pv1->tags) {
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
dm_pool_abandon_object(mem);
log_error("PV tags string extension failed.");
return 0;
}
first_tag = 0;
if (!dm_pool_grow_object(mem, sl->str, 0)) {
dm_pool_abandon_object(mem);
log_error("PV tags string extension failed.");
return 0;
}
}
continue;
}
if (!str_list_match_list(&pv1->tags, tags_to_match, &tag_matched))
continue;
else {
if (!pv_tags)
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
tag_matched, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
else
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
" from consideration: PV tag %s already used.",
area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched);
return 1;
}
}
if (!str_list_match_item(&pv1->tags, str) ||
(tags_to_match && !str_list_match_item(tags_to_match, str)))
continue;
else {
if (tags_list_str) {
if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
dm_pool_abandon_object(mem);
log_error("PV tags string extension failed.");
return 0;
}
first_tag = 0;
if (!dm_pool_grow_object(mem, str, 0)) {
dm_pool_abandon_object(mem);
log_error("PV tags string extension failed.");
return 0;
}
continue;
}
if (!pv_tags)
log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
str, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
else
log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
" from consideration: PV tag %s already used.",
area_num, pv_dev_name(pv1), pv1_start_pe, str);
return 1;
}
}
if (tags_list_str) {
if (!dm_pool_grow_object(mem, "\0", 1)) {
dm_pool_abandon_object(mem);
log_error("PV tags string extension failed.");
return 0;
}
*tags_list_str = dm_pool_end_object(mem);
return 1;
}
return 0;
}
static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn)
{
return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, NULL);
}
static const char *_tags_list_str(struct alloc_handle *ah, struct physical_volume *pv1)
{
const char *tags_list_str;
if (!_match_pv_tags(ah->cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, ah->mem, &tags_list_str))
return_NULL;
return tags_list_str;
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches a tag in the pv_tags list?
*/
static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
struct dm_list *pv_tags)
{
return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, NULL);
}
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
* matches a tag of the PV of the existing segment?
*/
static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn,
struct physical_volume *pv1, struct physical_volume *pv2)
{
return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, NULL);
}
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
{
return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv);
}
/*
* Is PV area contiguous to PV segment?
*/
static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva)
{
if (pvseg->pv != pva->map->pv)
return 0;
if (pvseg->pe + pvseg->len != pva->start)
return 0;
return 1;
}
static void _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
struct pv_area_used *area_used = &alloc_state->areas[ix_pva];
const char *pv_tag_list = NULL;
if (ah->cling_tag_list_cn)
pv_tag_list = _tags_list_str(ah, pva->map->pv);
log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
" length %" PRIu32 " leaving %" PRIu32 "%s%s.",
area_used->pva ? "Changing " : "Considering",
ix_pva, area_used->pva ? "to" : "as",
dev_name(pva->map->pv->dev), pva->start, required, unreserved,
pv_tag_list ? " with PV tags: " : "",
pv_tag_list ? : "");
if (pv_tag_list)
dm_pool_free(ah->mem, (void *)pv_tag_list);
area_used->pva = pva;
area_used->used = required;
}
static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
uint32_t s;
/* Expand areas array if needed after an area was split. */
if (ix_pva >= alloc_state->areas_size) {
alloc_state->areas_size *= 2;
if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
log_error("Memory reallocation for parallel areas failed.");
return 0;
}
for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
alloc_state->areas[s].pva = NULL;
}
_reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved);
return 1;
}
static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
struct pv_segment *pvseg, uint32_t s,
void *data)
{
struct pv_match *pvmatch = data;
int positional = pvmatch->alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
if (positional && pvmatch->alloc_state->areas[s].pva)
return 1; /* Area already assigned */
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
return 1; /* Continue */
if (positional && (s >= pvmatch->alloc_state->num_positional_areas))
return 1;
/* FIXME The previous test should make this one redundant. */
if (positional && (s >= pvmatch->alloc_state->areas_size))
return 1;
/*
* Only used for cling and contiguous policies (which only make one allocation per PV)
* so it's safe to say all the available space is used.
*/
if (positional)
_reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0);
return 2; /* Finished */
}
/*
* Is pva on same PV as any existing areas?
*/
static int _check_cling(struct alloc_handle *ah,
const struct dm_config_node *cling_tag_list_cn,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
uint32_t le, len;
pvmatch.ah = ah;
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = cling_tag_list_cn;
if (ah->maximise_cling) {
/* Check entire LV */
le = 0;
len = prev_lvseg->le + prev_lvseg->len;
} else {
/* Only check 1 LE at end of previous LV segment */
le = prev_lvseg->le + prev_lvseg->len - 1;
len = 1;
}
/* FIXME Cope with stacks by flattening */
if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL,
0, 0, -1, 1,
_is_condition, &pvmatch)))
stack;
if (r != 2)
return 0;
return 1;
}
/*
* Is pva contiguous to any existing areas or on the same PV?
*/
static int _check_contiguous(struct alloc_handle *ah,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
pvmatch.ah = ah;
pvmatch.condition = _is_contiguous;
pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = NULL;
/* FIXME Cope with stacks by flattening */
if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv,
prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
0, 0, -1, 1,
_is_condition, &pvmatch)))
stack;
if (r != 2)
return 0;
return 1;
}
/*
* Is pva on same PV as any areas already used in this allocation attempt?
*/
static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_config_node *cling_tag_list_cn,
struct pv_area *pva, struct alloc_state *alloc_state)
{
unsigned s;
struct alloced_area *aa;
int positional = alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
/*
* Ignore log areas. They are always allocated whole as part of the
* first allocation. If they aren't yet set, we know we've nothing to do.
*/
if (alloc_state->log_area_count_still_needed)
return 0;
for (s = 0; s < ah->area_count; s++) {
if (positional && alloc_state->areas[s].pva)
continue; /* Area already assigned */
dm_list_iterate_items(aa, &ah->alloced_areas[s]) {
if ((!cling_tag_list_cn && (pva->map->pv == aa[0].pv)) ||
(cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv))) {
if (positional)
_reserve_required_area(ah, alloc_state, pva, pva->count, s, 0);
return 1;
}
}
}
return 0;
}
static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs)
{
struct pv_list *pvl;
dm_list_iterate_items(pvl, parallel_pvs)
if (pv == pvl->pv)
return 1;
return 0;
}
/*
* Decide whether or not to try allocation from supplied area pva.
* alloc_state->areas may get modified.
*/
static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed,
struct alloc_state *alloc_state,
unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count)
{
const struct alloc_parms *alloc_parms = alloc_state->alloc_parms;
unsigned s;
/* Skip fully-reserved areas (which are not currently removed from the list). */
if (!pva->unreserved)
return NEXT_AREA;
/* FIXME Should this test be removed? */
if (iteration_count)
/*
* Don't use an area twice.
*/
for (s = 0; s < alloc_state->areas_size; s++)
if (alloc_state->areas[s].pva == pva)
return NEXT_AREA;
/* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG | A_CLING_TO_ALLOCED)) {
/* Contiguous? */
if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) ||
(ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) &&
_check_contiguous(ah, alloc_parms->prev_lvseg, pva, alloc_state))
goto found;
/* Try next area on same PV if looking for contiguous space */
if (alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG)
return NEXT_AREA;
/* Cling to prev_lvseg? */
if (((alloc_parms->flags & A_CLING_TO_LVSEG) ||
(ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) &&
_check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state))
/* If this PV is suitable, use this first area */
goto found;
/* Cling_to_alloced? */
if ((alloc_parms->flags & A_CLING_TO_ALLOCED) &&
_check_cling_to_alloced(ah, NULL, pva, alloc_state))
goto found;
/* Cling_by_tags? */
if (!(alloc_parms->flags & A_CLING_BY_TAGS) || !ah->cling_tag_list_cn)
return NEXT_PV;
if ((alloc_parms->flags & A_AREA_COUNT_MATCHES)) {
if (_check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state))
goto found;
} else if (_check_cling_to_alloced(ah, ah->cling_tag_list_cn, pva, alloc_state))
goto found;
/* All areas on this PV give same result so pointless checking more */
return NEXT_PV;
}
/* Normal/Anywhere */
/* Is it big enough on its own? */
if (pva->unreserved * ah->area_multiple < still_needed &&
((!(alloc_parms->flags & A_CAN_SPLIT) && !ah->log_area_count) ||
(already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE)))
return NEXT_PV;
found:
if (alloc_parms->flags & A_POSITIONAL_FILL)
return PREFERRED;
return USE_AREA;
}
/*
* Decide how many extents we're trying to obtain from a given area.
* Removes the extents from further consideration.
*/
static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc)
{
uint32_t required = max_to_allocate / ah->area_multiple;
/*
* Update amount unreserved - effectively splitting an area
* into two or more parts. If the whole stripe doesn't fit,
* reduce amount we're looking for.
*/
if (alloc == ALLOC_ANYWHERE) {
if (ix_pva >= ah->area_count + ah->parity_count)
required = ah->log_len;
} else if (required < ah->log_len)
required = ah->log_len;
if (required >= pva->unreserved) {
required = pva->unreserved;
pva->unreserved = 0;
} else {
pva->unreserved -= required;
reinsert_changed_pv_area(pva);
}
return required;
}
static void _clear_areas(struct alloc_state *alloc_state)
{
uint32_t s;
alloc_state->num_positional_areas = 0;
for (s = 0; s < alloc_state->areas_size; s++)
alloc_state->areas[s].pva = NULL;
}
static void _reset_unreserved(struct dm_list *pvms)
{
struct pv_map *pvm;
struct pv_area *pva;
dm_list_iterate_items(pvm, pvms)
dm_list_iterate_items(pva, &pvm->areas)
if (pva->unreserved != pva->count) {
pva->unreserved = pva->count;
reinsert_changed_pv_area(pva);
}
}
static void _report_needed_allocation_space(struct alloc_handle *ah,
struct alloc_state *alloc_state,
struct dm_list *pvms)
{
const char *metadata_type;
uint32_t parallel_areas_count, parallel_area_size;
uint32_t metadata_count, metadata_size;
parallel_area_size = ah->new_extents - alloc_state->allocated;
parallel_area_size /= ah->area_multiple;
parallel_area_size -= (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0;
parallel_areas_count = ah->area_count + ah->parity_count;
metadata_size = ah->log_len;
if (ah->alloc_and_split_meta) {
metadata_type = "metadata area";
metadata_count = parallel_areas_count;
if (ah->split_metadata_is_allocated)
metadata_size = 0;
} else {
metadata_type = "mirror log";
metadata_count = alloc_state->log_area_count_still_needed;
}
log_debug_alloc("Still need %s%" PRIu32 " total extents from %" PRIu32 " remaining (%" PRIu32 " positional slots):",
ah->approx_alloc ? "up to " : "",
parallel_area_size * parallel_areas_count + metadata_size * metadata_count, pv_maps_size(pvms),
alloc_state->num_positional_areas);
log_debug_alloc(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
" parity) parallel areas of %" PRIu32 " extents each",
parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
log_debug_alloc(" %" PRIu32 " %s%s of %" PRIu32 " extents each",
metadata_count, metadata_type,
(metadata_count == 1) ? "" : "s",
metadata_size);
}
/* Work through the array, removing any entries with tags already used by previous areas. */
static int _limit_to_one_area_per_tag(struct alloc_handle *ah, struct alloc_state *alloc_state,
uint32_t ix_log_offset, unsigned *ix)
{
uint32_t s = 0, u = 0;
DM_LIST_INIT(pv_tags);
while (s < alloc_state->areas_size && alloc_state->areas[s].pva) {
/* Start again with an empty tag list when we reach the log devices */
if (u == ix_log_offset)
dm_list_init(&pv_tags);
if (!_pv_has_matching_tag(ah->cling_tag_list_cn, alloc_state->areas[s].pva->map->pv, alloc_state->areas[s].pva->start, s, &pv_tags)) {
/* The comparison fn will ignore any non-cling tags so just add everything */
if (!str_list_add_list(ah->mem, &pv_tags, &alloc_state->areas[s].pva->map->pv->tags))
return_0;
if (s != u)
alloc_state->areas[u] = alloc_state->areas[s];
u++;
} else
(*ix)--; /* One area removed */
s++;
}
alloc_state->areas[u].pva = NULL;
return 1;
}
/*
* Returns 1 regardless of whether any space was found, except on error.
*/
static int _find_some_parallel_space(struct alloc_handle *ah,
struct dm_list *pvms, struct alloc_state *alloc_state,
struct dm_list *parallel_pvs, uint32_t max_to_allocate)
{
const struct alloc_parms *alloc_parms = alloc_state->alloc_parms;
unsigned ix = 0;
unsigned last_ix;
struct pv_map *pvm;
struct pv_area *pva;
unsigned preferred_count = 0;
unsigned already_found_one;
unsigned ix_log_offset; /* Offset to start of areas to use for log */
unsigned too_small_for_log_count; /* How many too small for log? */
unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
unsigned log_iteration_count = 0; /* extra iteration for logs on data devices */
struct alloced_area *aa;
uint32_t s;
uint32_t devices_needed = ah->area_count + ah->parity_count;
uint32_t required;
_clear_areas(alloc_state);
_reset_unreserved(pvms);
/* num_positional_areas holds the number of parallel allocations that must be contiguous/cling */
/* These appear first in the array, so it is also the offset to the non-preferred allocations */
/* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
if (!(alloc_parms->flags & A_POSITIONAL_FILL))
alloc_state->num_positional_areas = 0;
else if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG))
alloc_state->num_positional_areas = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
else if (alloc_parms->flags & A_CLING_TO_ALLOCED)
alloc_state->num_positional_areas = ah->area_count;
if (alloc_parms->alloc == ALLOC_NORMAL || (alloc_parms->flags & A_CLING_TO_ALLOCED))
log_debug_alloc("Cling_to_allocated is %sset",
alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
if (alloc_parms->flags & A_POSITIONAL_FILL)
log_debug_alloc("%u preferred area(s) to be filled positionally.", alloc_state->num_positional_areas);
else
log_debug_alloc("Areas to be sorted and filled sequentially.");
_report_needed_allocation_space(ah, alloc_state, pvms);
/* ix holds the number of areas found on other PVs */
do {
if (log_iteration_count) {
log_debug_alloc("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
} else if (iteration_count)
log_debug_alloc("Filled %u out of %u preferred areas so far.", preferred_count, alloc_state->num_positional_areas);
/*
* Provide for escape from the loop if no progress is made.
* This should not happen: ALLOC_ANYWHERE should be able to use
* all available space. (If there aren't enough extents, the code
* should not reach this point.)
*/
last_ix = ix;
/*
* Put the smallest area of each PV that is at least the
* size we need into areas array. If there isn't one
* that fits completely and we're allowed more than one
* LV segment, then take the largest remaining instead.
*/
dm_list_iterate_items(pvm, pvms) {
/* PV-level checks */
if (dm_list_empty(&pvm->areas))
continue; /* Next PV */
if (alloc_parms->alloc != ALLOC_ANYWHERE) {
/* Don't allocate onto the log PVs */
if (ah->log_area_count)
dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count])
for (s = 0; s < ah->log_area_count; s++)
if (!aa[s].pv)
goto next_pv;
/* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
/* (I've temporatily disabled the check.) */
/* Avoid PVs used by existing parallel areas */
if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs))
goto next_pv;
/*
* Avoid PVs already set aside for log.
* We only reach here if there were enough PVs for the main areas but
* not enough for the logs.
*/
if (log_iteration_count) {
for (s = devices_needed; s < ix + alloc_state->num_positional_areas; s++)
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
goto next_pv;
/* On a second pass, avoid PVs already used in an uncommitted area */
} else if (iteration_count)
for (s = 0; s < devices_needed; s++)
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
goto next_pv;
}
already_found_one = 0;
/* First area in each list is the largest */
dm_list_iterate_items(pva, &pvm->areas) {
/*
* There are two types of allocations, which can't be mixed at present:
*
* PREFERRED are stored immediately in a specific parallel slot.
* This is only used if the A_POSITIONAL_FILL flag is set.
* This requires the number of slots to match, so if comparing with
* prev_lvseg then A_AREA_COUNT_MATCHES must be set.
*
* USE_AREA are stored for later, then sorted and chosen from.
*/
switch(_check_pva(ah, pva, max_to_allocate,
alloc_state, already_found_one, iteration_count, log_iteration_count)) {
case PREFERRED:
preferred_count++;
/* Fall through */
case NEXT_PV:
goto next_pv;
case NEXT_AREA:
continue;
case USE_AREA:
/*
* Except with ALLOC_ANYWHERE, replace first area with this
* one which is smaller but still big enough.
*/
if (!already_found_one ||
alloc_parms->alloc == ALLOC_ANYWHERE) {
ix++;
already_found_one = 1;
}
/* Reserve required amount of pva */
required = _calc_required_extents(ah, pva, ix + alloc_state->num_positional_areas - 1, max_to_allocate, alloc_parms->alloc);
if (!_reserve_required_area(ah, alloc_state, pva, required, ix + alloc_state->num_positional_areas - 1, pva->unreserved))
return_0;
}
}
next_pv:
/* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */
/* With cling and contiguous we stop if we found a match for *all* the areas */
/* FIXME Rename these variables! */
if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
ix + alloc_state->num_positional_areas >= devices_needed + alloc_state->log_area_count_still_needed) ||
(preferred_count == alloc_state->num_positional_areas &&
(alloc_state->num_positional_areas == devices_needed + alloc_state->log_area_count_still_needed)))
break;
}
} while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) ||
/* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
(preferred_count < alloc_state->num_positional_areas || alloc_state->log_area_count_still_needed) &&
(alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
/* Extra iteration needed to fill log areas on PVs already used? */
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count == alloc_state->num_positional_areas && !ah->mirror_logs_separate &&
(ix + preferred_count >= devices_needed) &&
(ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
/* Non-zero ix means at least one USE_AREA was returned */
if (preferred_count < alloc_state->num_positional_areas && !(alloc_parms->flags & A_CLING_TO_ALLOCED) && !ix)
return 1;
if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed)
return 1;
/* Sort the areas so we allocate from the biggest */
if (log_iteration_count) {
if (ix > devices_needed + 1) {
log_debug_alloc("Sorting %u log areas", ix - devices_needed);
qsort(alloc_state->areas + devices_needed, ix - devices_needed, sizeof(*alloc_state->areas),
_comp_area);
}
} else if (ix > 1) {
log_debug_alloc("Sorting %u areas", ix);
qsort(alloc_state->areas + alloc_state->num_positional_areas, ix, sizeof(*alloc_state->areas),
_comp_area);
}
/* If there are gaps in our preferred areas, fill them from the sorted part of the array */
if (preferred_count && preferred_count != alloc_state->num_positional_areas) {
for (s = 0; s < devices_needed; s++)
if (!alloc_state->areas[s].pva) {
alloc_state->areas[s].pva = alloc_state->areas[alloc_state->num_positional_areas].pva;
alloc_state->areas[s].used = alloc_state->areas[alloc_state->num_positional_areas].used;
alloc_state->areas[alloc_state->num_positional_areas++].pva = NULL;
}
}
/*
* First time around, if there's a log, allocate it on the
* smallest device that has space for it.
*/
too_small_for_log_count = 0;
ix_log_offset = 0;
/* FIXME This logic is due to its heritage and can be simplified! */
if (alloc_state->log_area_count_still_needed) {
/* How many areas are too small for the log? */
while (too_small_for_log_count < alloc_state->num_positional_areas + ix &&
(*(alloc_state->areas + alloc_state->num_positional_areas + ix - 1 -
too_small_for_log_count)).used < ah->log_len)
too_small_for_log_count++;
ix_log_offset = alloc_state->num_positional_areas + ix - too_small_for_log_count - ah->log_area_count;
}
if (ix + alloc_state->num_positional_areas < devices_needed +
(alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
too_small_for_log_count : 0))
return 1;
/*
* FIXME We should change the code to do separate calls for the log allocation
* and the data allocation so that _limit_to_one_area_per_tag doesn't have to guess
* where the split is going to occur.
*/
/*
* This code covers the initial allocation - after that there is something to 'cling' to
* and we shouldn't get this far.
* alloc_state->num_positional_areas is assumed to be 0 with A_PARTITION_BY_TAGS.
*
* FIXME Consider a second attempt with A_PARTITION_BY_TAGS if, for example, the largest area
* had all the tags set, but other areas don't.
*/
if ((alloc_parms->flags & A_PARTITION_BY_TAGS) && !alloc_state->num_positional_areas) {
if (!_limit_to_one_area_per_tag(ah, alloc_state, ix_log_offset, &ix))
return_0;
/* Recalculate log position because we might have removed some areas from consideration */
if (alloc_state->log_area_count_still_needed) {
/* How many areas are too small for the log? */
too_small_for_log_count = 0;
while (too_small_for_log_count < ix &&
(*(alloc_state->areas + ix - 1 - too_small_for_log_count)).pva &&
(*(alloc_state->areas + ix - 1 - too_small_for_log_count)).used < ah->log_len)
too_small_for_log_count++;
if (ix < too_small_for_log_count + ah->log_area_count)
return 1;
ix_log_offset = ix - too_small_for_log_count - ah->log_area_count;
}
if (ix < devices_needed +
(alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
too_small_for_log_count : 0))
return 1;
}
/*
* Finally add the space identified to the list of areas to be used.
*/
if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
return_0;
/*
* Log is always allocated first time.
*/
alloc_state->log_area_count_still_needed = 0;
return 1;
}
/*
* Choose sets of parallel areas to use, respecting any constraints
* supplied in alloc_parms.
*/
static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms,
struct dm_list *pvms, struct alloc_state *alloc_state)
{
uint32_t max_tmp;
uint32_t max_to_allocate; /* Maximum extents to allocate this time */
uint32_t old_allocated;
uint32_t next_le;
struct seg_pvs *spvs;
struct dm_list *parallel_pvs;
alloc_state->alloc_parms = alloc_parms;
/* FIXME This algorithm needs a lot of cleaning up! */
/* FIXME anywhere doesn't find all space yet */
do {
parallel_pvs = NULL;
max_to_allocate = alloc_parms->extents_still_needed - alloc_state->allocated;
/*
* If there are existing parallel PVs, avoid them and reduce
* the maximum we can allocate in one go accordingly.
*/
if (ah->parallel_areas) {
next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple;
dm_list_iterate_items(spvs, ah->parallel_areas) {
if (next_le >= spvs->le + spvs->len)
continue;
max_tmp = max_to_allocate +
alloc_state->allocated;
/*
* Because a request that groups metadata and
* data together will be split, we must adjust
* the comparison accordingly.
*/
if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated)
max_tmp -= ah->log_len;
if (max_tmp > (spvs->le + spvs->len) * ah->area_multiple) {
max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated;
max_to_allocate += (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0;
}
parallel_pvs = &spvs->pvs;
break;
}
}
old_allocated = alloc_state->allocated;
if (!_find_some_parallel_space(ah, pvms, alloc_state, parallel_pvs, max_to_allocate))
return_0;
/*
* For ALLOC_CLING, if the number of areas matches and maximise_cling is
* set we allow two passes, first with A_POSITIONAL_FILL then without.
*
* If we didn't allocate anything this time with ALLOC_NORMAL and had
* A_CLING_TO_ALLOCED set, try again without it.
*
* For ALLOC_NORMAL, if we did allocate something without the
* flag set, set it and continue so that further allocations
* remain on the same disks where possible.
*/
if (old_allocated == alloc_state->allocated) {
if (ah->maximise_cling && ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) &&
(alloc_parms->flags & A_CLING_TO_LVSEG) && (alloc_parms->flags & A_POSITIONAL_FILL))
alloc_parms->flags &= ~A_POSITIONAL_FILL;
else if ((alloc_parms->alloc == ALLOC_NORMAL) && (alloc_parms->flags & A_CLING_TO_ALLOCED))
alloc_parms->flags &= ~A_CLING_TO_ALLOCED;
else
break; /* Give up */
} else if (ah->maximise_cling && alloc_parms->