| /* |
| * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. |
| * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved. |
| * |
| * This file is part of LVM2. |
| * |
| * This copyrighted material is made available to anyone wishing to use, |
| * modify, copy, or redistribute it subject to the terms and conditions |
| * of the GNU Lesser General Public License v.2.1. |
| * |
| * You should have received a copy of the GNU Lesser General Public License |
| * along with this program; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "lib.h" |
| #include "metadata.h" |
| #include "locking.h" |
| #include "pv_map.h" |
| #include "lvm-string.h" |
| #include "toolcontext.h" |
| #include "lv_alloc.h" |
| #include "pv_alloc.h" |
| #include "display.h" |
| #include "segtype.h" |
| #include "archiver.h" |
| #include "activate.h" |
| #include "str_list.h" |
| #include "defaults.h" |
| #include "lvm-exec.h" |
| #include "memlock.h" |
| #include "lvmlockd.h" |
| |
| typedef enum { |
| PREFERRED, |
| USE_AREA, |
| NEXT_PV, |
| NEXT_AREA |
| } area_use_t; |
| |
| /* FIXME: remove RAID_METADATA_AREA_LEN macro after defining 'raid_log_extents'*/ |
| #define RAID_METADATA_AREA_LEN 1 |
| |
| /* FIXME These ended up getting used differently from first intended. Refactor. */ |
| /* Only one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG, A_CLING_TO_ALLOCED may be set */ |
| #define A_CONTIGUOUS_TO_LVSEG 0x01 /* Must be contiguous to an existing segment */ |
| #define A_CLING_TO_LVSEG 0x02 /* Must use same disks as existing LV segment */ |
| #define A_CLING_TO_ALLOCED 0x04 /* Must use same disks as already-allocated segment */ |
| |
| #define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */ |
| #define A_CAN_SPLIT 0x10 |
| #define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */ |
| |
| #define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */ |
| #define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */ |
| |
| /* |
| * Constant parameters during a single allocation attempt. |
| */ |
| struct alloc_parms { |
| alloc_policy_t alloc; |
| unsigned flags; /* Holds A_* */ |
| struct lv_segment *prev_lvseg; |
| uint32_t extents_still_needed; |
| }; |
| |
| /* |
| * Holds varying state of each allocation attempt. |
| */ |
| struct alloc_state { |
| const struct alloc_parms *alloc_parms; |
| struct pv_area_used *areas; |
| uint32_t areas_size; |
| uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */ |
| uint32_t allocated; /* Total number of extents allocated so far */ |
| uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */ |
| }; |
| |
| struct lv_names { |
| const char *old; |
| const char *new; |
| }; |
| |
| enum { |
| LV_TYPE_UNKNOWN, |
| LV_TYPE_NONE, |
| LV_TYPE_PUBLIC, |
| LV_TYPE_PRIVATE, |
| LV_TYPE_HISTORY, |
| LV_TYPE_LINEAR, |
| LV_TYPE_STRIPED, |
| LV_TYPE_MIRROR, |
| LV_TYPE_RAID, |
| LV_TYPE_THIN, |
| LV_TYPE_CACHE, |
| LV_TYPE_SPARSE, |
| LV_TYPE_ORIGIN, |
| LV_TYPE_THINORIGIN, |
| LV_TYPE_MULTITHINORIGIN, |
| LV_TYPE_THICKORIGIN, |
| LV_TYPE_MULTITHICKORIGIN, |
| LV_TYPE_CACHEORIGIN, |
| LV_TYPE_EXTTHINORIGIN, |
| LV_TYPE_MULTIEXTTHINORIGIN, |
| LV_TYPE_SNAPSHOT, |
| LV_TYPE_THINSNAPSHOT, |
| LV_TYPE_THICKSNAPSHOT, |
| LV_TYPE_PVMOVE, |
| LV_TYPE_IMAGE, |
| LV_TYPE_LOG, |
| LV_TYPE_METADATA, |
| LV_TYPE_POOL, |
| LV_TYPE_DATA, |
| LV_TYPE_SPARE, |
| LV_TYPE_VIRTUAL, |
| LV_TYPE_RAID0, |
| LV_TYPE_RAID0_META, |
| LV_TYPE_RAID1, |
| LV_TYPE_RAID10, |
| LV_TYPE_RAID4, |
| LV_TYPE_RAID5, |
| LV_TYPE_RAID5_LA, |
| LV_TYPE_RAID5_RA, |
| LV_TYPE_RAID5_LS, |
| LV_TYPE_RAID5_RS, |
| LV_TYPE_RAID6, |
| LV_TYPE_RAID6_ZR, |
| LV_TYPE_RAID6_NR, |
| LV_TYPE_RAID6_NC, |
| LV_TYPE_LOCKD, |
| LV_TYPE_SANLOCK |
| }; |
| |
| static const char *_lv_type_names[] = { |
| [LV_TYPE_UNKNOWN] = "unknown", |
| [LV_TYPE_NONE] = "none", |
| [LV_TYPE_PUBLIC] = "public", |
| [LV_TYPE_PRIVATE] = "private", |
| [LV_TYPE_HISTORY] = "history", |
| [LV_TYPE_LINEAR] = "linear", |
| [LV_TYPE_STRIPED] = "striped", |
| [LV_TYPE_MIRROR] = "mirror", |
| [LV_TYPE_RAID] = "raid", |
| [LV_TYPE_THIN] = "thin", |
| [LV_TYPE_CACHE] = "cache", |
| [LV_TYPE_SPARSE] = "sparse", |
| [LV_TYPE_ORIGIN] = "origin", |
| [LV_TYPE_THINORIGIN] = "thinorigin", |
| [LV_TYPE_MULTITHINORIGIN] = "multithinorigin", |
| [LV_TYPE_THICKORIGIN] = "thickorigin", |
| [LV_TYPE_MULTITHICKORIGIN] = "multithickorigin", |
| [LV_TYPE_CACHEORIGIN] = "cacheorigin", |
| [LV_TYPE_EXTTHINORIGIN] = "extthinorigin", |
| [LV_TYPE_MULTIEXTTHINORIGIN] = "multiextthinorigin", |
| [LV_TYPE_SNAPSHOT] = "snapshot", |
| [LV_TYPE_THINSNAPSHOT] = "thinsnapshot", |
| [LV_TYPE_THICKSNAPSHOT] = "thicksnapshot", |
| [LV_TYPE_PVMOVE] = "pvmove", |
| [LV_TYPE_IMAGE] = "image", |
| [LV_TYPE_LOG] = "log", |
| [LV_TYPE_METADATA] = "metadata", |
| [LV_TYPE_POOL] = "pool", |
| [LV_TYPE_DATA] = "data", |
| [LV_TYPE_SPARE] = "spare", |
| [LV_TYPE_VIRTUAL] = "virtual", |
| [LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0, |
| [LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META, |
| [LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1, |
| [LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10, |
| [LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4, |
| [LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5, |
| [LV_TYPE_RAID5_LA] = SEG_TYPE_NAME_RAID5_LA, |
| [LV_TYPE_RAID5_RA] = SEG_TYPE_NAME_RAID5_RA, |
| [LV_TYPE_RAID5_LS] = SEG_TYPE_NAME_RAID5_LS, |
| [LV_TYPE_RAID5_RS] = SEG_TYPE_NAME_RAID5_RS, |
| [LV_TYPE_RAID6] = SEG_TYPE_NAME_RAID6, |
| [LV_TYPE_RAID6_ZR] = SEG_TYPE_NAME_RAID6_ZR, |
| [LV_TYPE_RAID6_NR] = SEG_TYPE_NAME_RAID6_NR, |
| [LV_TYPE_RAID6_NC] = SEG_TYPE_NAME_RAID6_NC, |
| [LV_TYPE_LOCKD] = "lockd", |
| [LV_TYPE_SANLOCK] = "sanlock", |
| }; |
| |
| static int _lv_layout_and_role_mirror(struct dm_pool *mem, |
| const struct logical_volume *lv, |
| struct dm_list *layout, |
| struct dm_list *role, |
| int *public_lv) |
| { |
| int top_level = 0; |
| |
| /* non-top-level LVs */ |
| if (lv_is_mirror_image(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE])) |
| goto_bad; |
| } else if (lv_is_mirror_log(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_LOG])) |
| goto_bad; |
| if (lv_is_mirrored(lv) && |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR])) |
| goto_bad; |
| } else if (lv_is_pvmove(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) || |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR])) |
| goto_bad; |
| } else |
| top_level = 1; |
| |
| |
| if (!top_level) { |
| *public_lv = 0; |
| return 1; |
| } |
| |
| /* top-level LVs */ |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR])) |
| goto_bad; |
| |
| return 1; |
| bad: |
| return 0; |
| } |
| |
| static int _lv_layout_and_role_raid(struct dm_pool *mem, |
| const struct logical_volume *lv, |
| struct dm_list *layout, |
| struct dm_list *role, |
| int *public_lv) |
| { |
| int top_level = 0; |
| const struct segment_type *segtype; |
| |
| /* non-top-level LVs */ |
| if (lv_is_raid_image(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE])) |
| goto_bad; |
| } else if (lv_is_raid_metadata(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA])) |
| goto_bad; |
| } else if (lv_is_pvmove(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) || |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID])) |
| goto_bad; |
| } else |
| top_level = 1; |
| |
| if (!top_level) { |
| *public_lv = 0; |
| return 1; |
| } |
| |
| /* top-level LVs */ |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID])) |
| goto_bad; |
| |
| segtype = first_seg(lv)->segtype; |
| |
| if (segtype_is_raid0(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID0])) |
| goto_bad; |
| } else if (segtype_is_raid1(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID1])) |
| goto_bad; |
| } else if (segtype_is_raid10(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID10])) |
| goto_bad; |
| } else if (segtype_is_raid4(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID4])) |
| goto_bad; |
| } else if (segtype_is_any_raid5(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5])) |
| goto_bad; |
| |
| if (segtype_is_raid5_la(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LA])) |
| goto_bad; |
| } else if (segtype_is_raid5_ra(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RA])) |
| goto_bad; |
| } else if (segtype_is_raid5_ls(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LS])) |
| goto_bad; |
| } else if (segtype_is_raid5_rs(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RS])) |
| goto_bad; |
| } |
| } else if (segtype_is_any_raid6(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6])) |
| goto_bad; |
| |
| if (segtype_is_raid6_zr(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_ZR])) |
| goto_bad; |
| } else if (segtype_is_raid6_nr(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NR])) |
| goto_bad; |
| } else if (segtype_is_raid6_nc(segtype)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NC])) |
| goto_bad; |
| } |
| } |
| |
| return 1; |
| bad: |
| return 0; |
| } |
| |
| static int _lv_layout_and_role_thin(struct dm_pool *mem, |
| const struct logical_volume *lv, |
| struct dm_list *layout, |
| struct dm_list *role, |
| int *public_lv) |
| { |
| int top_level = 0; |
| unsigned snap_count; |
| struct lv_segment *seg; |
| |
| /* non-top-level LVs */ |
| if (lv_is_thin_pool_metadata(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA])) |
| goto_bad; |
| } else if (lv_is_thin_pool_data(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA])) |
| goto_bad; |
| } else |
| top_level = 1; |
| |
| if (!top_level) { |
| *public_lv = 0; |
| return 1; |
| } |
| |
| /* top-level LVs */ |
| if (lv_is_thin_volume(lv)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) || |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE])) |
| goto_bad; |
| if (lv_is_thin_origin(lv, &snap_count)) { |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINORIGIN])) |
| goto_bad; |
| if (snap_count > 1 && |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHINORIGIN])) |
| goto_bad; |
| } |
| if ((seg = first_seg(lv)) && (seg->origin || seg->external_lv)) |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINSNAPSHOT])) |
| goto_bad; |
| } else if (lv_is_thin_pool(lv)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) || |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL])) |
| goto_bad; |
| *public_lv = 0; |
| } |
| |
| if (lv_is_external_origin(lv)) { |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_EXTTHINORIGIN])) |
| goto_bad; |
| if (lv->external_count > 1 && |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTIEXTTHINORIGIN])) |
| goto_bad; |
| } |
| |
| return 1; |
| bad: |
| return 0; |
| } |
| |
| static int _lv_layout_and_role_cache(struct dm_pool *mem, |
| const struct logical_volume *lv, |
| struct dm_list *layout, |
| struct dm_list *role, |
| int *public_lv) |
| { |
| int top_level = 0; |
| |
| /* non-top-level LVs */ |
| if (lv_is_cache_pool_metadata(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA])) |
| goto_bad; |
| } else if (lv_is_cache_pool_data(lv)) { |
| if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA])) |
| goto_bad; |
| if (lv_is_cache(lv) && |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE])) |
| goto_bad; |
| } else if (lv_is_cache_origin(lv)) { |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_CACHE]) || |
| !str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHEORIGIN])) |
| goto_bad; |
| if (lv_is_cache(lv) && |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE])) |
| goto_bad; |
| } else |
| top_level = 1; |
| |
| if (!top_level) { |
| *public_lv = 0; |
| return 1; |
| } |
| |
| /* top-level LVs */ |
| if (lv_is_cache(lv) && |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE])) |
| goto_bad; |
| else if (lv_is_cache_pool(lv)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) || |
| !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL])) |
| goto_bad; |
| *public_lv = 0; |
| } |
| |
| return 1; |
| bad: |
| return 0; |
| } |
| |
| static int _lv_layout_and_role_thick_origin_snapshot(struct dm_pool *mem, |
| const struct logical_volume *lv, |
| struct dm_list *layout, |
| struct dm_list *role, |
| int *public_lv) |
| { |
| if (lv_is_origin(lv)) { |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKORIGIN])) |
| goto_bad; |
| /* |
| * Thin volumes are also marked with virtual flag, but we don't show "virtual" |
| * layout for thin LVs as they have their own keyword for layout - "thin"! |
| * So rule thin LVs out here! |
| */ |
| if (lv_is_virtual(lv) && !lv_is_thin_volume(lv)) { |
| if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VIRTUAL])) |
| goto_bad; |
| *public_lv = 0; |
| } |
| if (lv->origin_count > 1 && |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHICKORIGIN])) |
| goto_bad; |
| } else if (lv_is_cow(lv)) { |
| if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) || |
| !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKSNAPSHOT])) |
| goto_bad; |
| } |
| |
| return 1; |
| bad: |
| return 0; |
| } |
| |
| int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv, |
| struct dm_list **layout, struct dm_list **role) { |
| int linear, striped; |
| struct lv_segment *seg; |
| int public_lv = 1; |
| |
| *layout = *role = NULL; |
| |
| if (!(*layout = str_list_create(mem))) { |
| log_error("LV layout list allocation failed"); |
| return 0; |
| } |
| |
| if (!(*role = str_list_create(mem))) { |
| log_error("LV role list allocation failed"); |
| goto bad; |
| } |
| |
| if (lv_is_historical(lv)) { |
| if (!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_NONE]) || |
| !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_HISTORY])) |
| goto_bad; |
| } |
| |
| /* Mirrors and related */ |
| if ((lv_is_mirror_type(lv) || lv_is_pvmove(lv)) && |
| !_lv_layout_and_role_mirror(mem, lv, *layout, *role, &public_lv)) |
| goto_bad; |
| |
| /* RAIDs and related */ |
| if (lv_is_raid_type(lv) && |
| !_lv_layout_and_role_raid(mem, lv, *layout, *role, &public_lv)) |
| goto_bad; |
| |
| /* Thins and related */ |
| if ((lv_is_thin_type(lv) || lv_is_external_origin(lv)) && |
| !_lv_layout_and_role_thin(mem, lv, *layout, *role, &public_lv)) |
| goto_bad; |
| |
| /* Caches and related */ |
| if ((lv_is_cache_type(lv) || lv_is_cache_origin(lv)) && |
| !_lv_layout_and_role_cache(mem, lv, *layout, *role, &public_lv)) |
| goto_bad; |
| |
| /* Pool-specific */ |
| if (lv_is_pool_metadata_spare(lv)) { |
| if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_POOL]) || |
| !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SPARE])) |
| goto_bad; |
| public_lv = 0; |
| } |
| |
| /* Old-style origins/snapshots, virtual origins */ |
| if (!_lv_layout_and_role_thick_origin_snapshot(mem, lv, *layout, *role, &public_lv)) |
| goto_bad; |
| |
| if (lv_is_lockd_sanlock_lv(lv)) { |
| if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_LOCKD]) || |
| !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SANLOCK])) |
| goto_bad; |
| public_lv = 0; |
| } |
| |
| /* |
| * If layout not yet determined, it must be either |
| * linear or striped or mixture of these two. |
| */ |
| if (dm_list_empty(*layout)) { |
| linear = striped = 0; |
| dm_list_iterate_items(seg, &lv->segments) { |
| if (seg_is_linear(seg)) |
| linear = 1; |
| else if (seg_is_striped(seg)) |
| striped = 1; |
| else { |
| /* |
| * This should not happen but if it does |
| * we'll see that there's "unknown" layout |
| * present. This means we forgot to detect |
| * the role above and we need add proper |
| * detection for such role! |
| */ |
| log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect " |
| "layout and role for LV %s/%s.", |
| lv->vg->name, lv->name); |
| } |
| } |
| |
| if (linear && |
| !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_LINEAR])) |
| goto_bad; |
| |
| if (striped && |
| !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_STRIPED])) |
| goto_bad; |
| |
| if (!linear && !striped && |
| !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_UNKNOWN])) |
| goto_bad; |
| } |
| |
| /* finally, add either 'public' or 'private' role to the LV */ |
| if (public_lv) { |
| if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PUBLIC])) |
| goto_bad; |
| } else { |
| if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PRIVATE])) |
| goto_bad; |
| } |
| |
| return 1; |
| bad: |
| dm_pool_free(mem, *layout); |
| |
| return 0; |
| } |
| struct dm_list_and_mempool { |
| struct dm_list *list; |
| struct dm_pool *mem; |
| }; |
| static int _get_pv_list_for_lv(struct logical_volume *lv, void *data) |
| { |
| int dup_found; |
| uint32_t s; |
| struct pv_list *pvl; |
| struct lv_segment *seg; |
| struct dm_list *pvs = ((struct dm_list_and_mempool *)data)->list; |
| struct dm_pool *mem = ((struct dm_list_and_mempool *)data)->mem; |
| |
| dm_list_iterate_items(seg, &lv->segments) { |
| for (s = 0; s < seg->area_count; s++) { |
| dup_found = 0; |
| |
| if (seg_type(seg, s) != AREA_PV) |
| continue; |
| |
| /* do not add duplicates */ |
| dm_list_iterate_items(pvl, pvs) |
| if (pvl->pv == seg_pv(seg, s)) |
| dup_found = 1; |
| |
| if (dup_found) |
| continue; |
| |
| if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) { |
| log_error("Failed to allocate memory"); |
| return 0; |
| } |
| |
| pvl->pv = seg_pv(seg, s); |
| log_debug_metadata(" %s/%s uses %s", lv->vg->name, |
| lv->name, pv_dev_name(pvl->pv)); |
| |
| dm_list_add(pvs, &pvl->list); |
| } |
| } |
| |
| return 1; |
| } |
| |
| /* |
| * get_pv_list_for_lv |
| * @mem - mempool to allocate the list from. |
| * @lv |
| * @pvs - The list to add pv_list items to. |
| * |
| * 'pvs' is filled with 'pv_list' items for PVs that compose the LV. |
| * If the 'pvs' list already has items in it, duplicates will not be |
| * added. So, it is safe to repeatedly call this function for different |
| * LVs and build up a list of PVs for them all. |
| * |
| * Memory to create the list is obtained from the mempool provided. |
| * |
| * Returns: 1 on success, 0 on error |
| */ |
| int get_pv_list_for_lv(struct dm_pool *mem, |
| struct logical_volume *lv, struct dm_list *pvs) |
| { |
| struct dm_list_and_mempool context = { pvs, mem }; |
| |
| log_debug_metadata("Generating list of PVs that %s/%s uses:", |
| lv->vg->name, lv->name); |
| |
| if (!_get_pv_list_for_lv(lv, &context)) |
| return_0; |
| |
| return for_each_sub_lv(lv, &_get_pv_list_for_lv, &context); |
| } |
| |
| /* |
| * get_default_region_size |
| * @cmd |
| * |
| * 'mirror_region_size' and 'raid_region_size' are effectively the same thing. |
| * However, "raid" is more inclusive than "mirror", so the name has been |
| * changed. This function checks for the old setting and warns the user if |
| * it is being overridden by the new setting (i.e. warn if both settings are |
| * present). |
| * |
| * Note that the config files give defaults in kiB terms, but we |
| * return the value in terms of sectors. |
| * |
| * Returns: default region_size in sectors |
| */ |
| static int _get_default_region_size(struct cmd_context *cmd) |
| { |
| int mrs, rrs; |
| |
| /* |
| * 'mirror_region_size' is the old setting. It is overridden |
| * by the new setting, 'raid_region_size'. |
| */ |
| mrs = 2 * find_config_tree_int(cmd, activation_mirror_region_size_CFG, NULL); |
| rrs = 2 * find_config_tree_int(cmd, activation_raid_region_size_CFG, NULL); |
| |
| if (!mrs && !rrs) |
| return DEFAULT_RAID_REGION_SIZE * 2; |
| |
| if (!mrs) |
| return rrs; |
| |
| if (!rrs) |
| return mrs; |
| |
| if (mrs != rrs) |
| log_verbose("Overriding default 'mirror_region_size' setting" |
| " with 'raid_region_size' setting of %u kiB", |
| rrs / 2); |
| |
| return rrs; |
| } |
| |
| static int _round_down_pow2(int r) |
| { |
| /* Set all bits to the right of the leftmost set bit */ |
| r |= (r >> 1); |
| r |= (r >> 2); |
| r |= (r >> 4); |
| r |= (r >> 8); |
| r |= (r >> 16); |
| |
| /* Pull out the leftmost set bit */ |
| return r & ~(r >> 1); |
| } |
| |
| int get_default_region_size(struct cmd_context *cmd) |
| { |
| int region_size = _get_default_region_size(cmd); |
| |
| if (!is_power_of_2(region_size)) { |
| region_size = _round_down_pow2(region_size); |
| log_verbose("Reducing region size to %u kiB (power of 2).", |
| region_size / 2); |
| } |
| |
| return region_size; |
| } |
| |
| int add_seg_to_segs_using_this_lv(struct logical_volume *lv, |
| struct lv_segment *seg) |
| { |
| struct seg_list *sl; |
| |
| dm_list_iterate_items(sl, &lv->segs_using_this_lv) { |
| if (sl->seg == seg) { |
| sl->count++; |
| return 1; |
| } |
| } |
| |
| log_very_verbose("Adding %s:%" PRIu32 " as an user of %s", |
| seg->lv->name, seg->le, lv->name); |
| |
| if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) { |
| log_error("Failed to allocate segment list"); |
| return 0; |
| } |
| |
| sl->count = 1; |
| sl->seg = seg; |
| dm_list_add(&lv->segs_using_this_lv, &sl->list); |
| |
| return 1; |
| } |
| |
| int remove_seg_from_segs_using_this_lv(struct logical_volume *lv, |
| struct lv_segment *seg) |
| { |
| struct seg_list *sl; |
| |
| dm_list_iterate_items(sl, &lv->segs_using_this_lv) { |
| if (sl->seg != seg) |
| continue; |
| if (sl->count > 1) |
| sl->count--; |
| else { |
| log_very_verbose("%s:%" PRIu32 " is no longer a user " |
| "of %s", seg->lv->name, seg->le, |
| lv->name); |
| dm_list_del(&sl->list); |
| } |
| return 1; |
| } |
| |
| log_error(INTERNAL_ERROR "Segment %s:%u is not a user of %s.", |
| seg->lv->name, seg->le, lv->name); |
| return 0; |
| } |
| |
| /* |
| * This is a function specialized for the common case where there is |
| * only one segment which uses the LV. |
| * e.g. the LV is a layer inserted by insert_layer_for_lv(). |
| * |
| * In general, walk through lv->segs_using_this_lv. |
| */ |
| struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *lv) |
| { |
| struct seg_list *sl; |
| |
| if (!lv) { |
| log_error(INTERNAL_ERROR "get_only_segment_using_this_lv() called with NULL LV."); |
| return NULL; |
| } |
| |
| dm_list_iterate_items(sl, &lv->segs_using_this_lv) { |
| /* Needs to be he only item in list */ |
| if (!dm_list_end(&lv->segs_using_this_lv, &sl->list)) |
| break; |
| |
| if (sl->count != 1) { |
| log_error("%s is expected to have only one segment using it, " |
| "while %s:%" PRIu32 " uses it %d times.", |
| display_lvname(lv), sl->seg->lv->name, sl->seg->le, sl->count); |
| return NULL; |
| } |
| |
| return sl->seg; |
| } |
| |
| log_error("%s is expected to have only one segment using it, while it has %d.", |
| display_lvname(lv), dm_list_size(&lv->segs_using_this_lv)); |
| |
| return NULL; |
| } |
| |
| /* |
| * PVs used by a segment of an LV |
| */ |
| struct seg_pvs { |
| struct dm_list list; |
| |
| struct dm_list pvs; /* struct pv_list */ |
| |
| uint32_t le; |
| uint32_t len; |
| }; |
| |
| static struct seg_pvs *_find_seg_pvs_by_le(struct dm_list *list, uint32_t le) |
| { |
| struct seg_pvs *spvs; |
| |
| dm_list_iterate_items(spvs, list) |
| if (le >= spvs->le && le < spvs->le + spvs->len) |
| return spvs; |
| |
| return NULL; |
| } |
| |
| /* |
| * Find first unused LV number. |
| */ |
| uint32_t find_free_lvnum(struct logical_volume *lv) |
| { |
| int lvnum_used[MAX_RESTRICTED_LVS + 1] = { 0 }; |
| uint32_t i = 0; |
| struct lv_list *lvl; |
| int lvnum; |
| |
| dm_list_iterate_items(lvl, &lv->vg->lvs) { |
| lvnum = lvnum_from_lvid(&lvl->lv->lvid); |
| if (lvnum <= MAX_RESTRICTED_LVS) |
| lvnum_used[lvnum] = 1; |
| } |
| |
| while (lvnum_used[i]) |
| i++; |
| |
| /* FIXME What if none are free? */ |
| |
| return i; |
| } |
| |
| dm_percent_t copy_percent(const struct logical_volume *lv) |
| { |
| uint32_t numerator = 0u, denominator = 0u; |
| struct lv_segment *seg; |
| |
| dm_list_iterate_items(seg, &lv->segments) { |
| denominator += seg->area_len; |
| |
| /* FIXME Generalise name of 'extents_copied' field */ |
| if (((seg_is_raid(seg) && !seg_is_any_raid0(seg)) || seg_is_mirrored(seg)) && |
| (seg->area_count > 1)) |
| numerator += seg->extents_copied; |
| else |
| numerator += seg->area_len; |
| } |
| |
| return denominator ? dm_make_percent(numerator, denominator) : DM_PERCENT_100; |
| } |
| |
| /* Round up extents to next stripe boundary for number of stripes */ |
| static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t extents, |
| uint32_t stripes, int extend) |
| { |
| uint32_t size_rest, new_extents = extents; |
| |
| if (!stripes) |
| return extents; |
| |
| /* Round up extents to stripe divisible amount */ |
| if ((size_rest = extents % stripes)) { |
| new_extents += extend ? stripes - size_rest : -size_rest; |
| log_print_unless_silent("Rounding size %s (%d extents) up to stripe boundary size %s (%d extents).", |
| display_size(vg->cmd, (uint64_t) extents * vg->extent_size), extents, |
| display_size(vg->cmd, (uint64_t) new_extents * vg->extent_size), new_extents); |
| } |
| |
| return new_extents; |
| } |
| |
| /* |
| * All lv_segments get created here. |
| */ |
| struct lv_segment *alloc_lv_segment(const struct segment_type *segtype, |
| struct logical_volume *lv, |
| uint32_t le, uint32_t len, |
| uint64_t status, |
| uint32_t stripe_size, |
| struct logical_volume *log_lv, |
| uint32_t area_count, |
| uint32_t area_len, |
| uint32_t chunk_size, |
| uint32_t region_size, |
| uint32_t extents_copied, |
| struct lv_segment *pvmove_source_seg) |
| { |
| struct lv_segment *seg; |
| struct dm_pool *mem = lv->vg->vgmem; |
| uint32_t areas_sz = area_count * sizeof(*seg->areas); |
| |
| if (!segtype) { |
| log_error(INTERNAL_ERROR "alloc_lv_segment: Missing segtype."); |
| return NULL; |
| } |
| |
| if (!(seg = dm_pool_zalloc(mem, sizeof(*seg)))) |
| return_NULL; |
| |
| if (!(seg->areas = dm_pool_zalloc(mem, areas_sz))) { |
| dm_pool_free(mem, seg); |
| return_NULL; |
| } |
| |
| if (segtype_is_raid(segtype) && |
| !segtype_is_raid0(segtype) && |
| !(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) { |
| dm_pool_free(mem, seg); /* frees everything alloced since seg */ |
| return_NULL; |
| } |
| |
| seg->segtype = segtype; |
| seg->lv = lv; |
| seg->le = le; |
| seg->len = len; |
| seg->status = status; |
| seg->stripe_size = stripe_size; |
| seg->area_count = area_count; |
| seg->area_len = area_len; |
| seg->chunk_size = chunk_size; |
| seg->region_size = region_size; |
| seg->extents_copied = extents_copied; |
| seg->pvmove_source_seg = pvmove_source_seg; |
| dm_list_init(&seg->tags); |
| dm_list_init(&seg->origin_list); |
| dm_list_init(&seg->thin_messages); |
| |
| if (log_lv && !attach_mirror_log(seg, log_lv)) |
| return_NULL; |
| |
| if (segtype_is_mirror(segtype)) |
| lv->status |= MIRROR; |
| |
| if (segtype_is_mirrored(segtype)) |
| lv->status |= MIRRORED; |
| |
| return seg; |
| } |
| |
| static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s, |
| uint32_t area_reduction, int with_discard) |
| { |
| struct lv_segment *cache_seg; |
| struct logical_volume *lv = seg_lv(seg, s); |
| |
| if (seg_type(seg, s) == AREA_UNASSIGNED) |
| return 1; |
| |
| if (seg_type(seg, s) == AREA_PV) { |
| if (with_discard && !discard_pv_segment(seg_pvseg(seg, s), area_reduction)) |
| return_0; |
| |
| if (!release_pv_segment(seg_pvseg(seg, s), area_reduction)) |
| return_0; |
| |
| if (seg->area_len == area_reduction) |
| seg_type(seg, s) = AREA_UNASSIGNED; |
| |
| return 1; |
| } |
| |
| if (lv_is_mirror_image(lv) || |
| lv_is_thin_pool_data(lv) || |
| lv_is_cache_pool_data(lv)) { |
| if (!lv_reduce(lv, area_reduction)) |
| return_0; /* FIXME: any upper level reporting */ |
| return 1; |
| } |
| |
| if (seg_is_cache_pool(seg) && |
| !dm_list_empty(&seg->lv->segs_using_this_lv)) { |
| if (!(cache_seg = get_only_segment_using_this_lv(seg->lv))) |
| return_0; |
| |
| if (!lv_cache_remove(cache_seg->lv)) |
| return_0; |
| } |
| |
| if (lv_is_raid_image(lv)) { |
| /* |
| * FIXME: Use lv_reduce not lv_remove |
| * We use lv_remove for now, because I haven't figured out |
| * why lv_reduce won't remove the LV. |
| lv_reduce(lv, area_reduction); |
| */ |
| if (area_reduction != seg->area_len) { |
| log_error("Unable to reduce RAID LV - operation not implemented."); |
| return_0; |
| } else { |
| if (!lv_remove(lv)) { |
| log_error("Failed to remove RAID image %s", |
| lv->name); |
| return 0; |
| } |
| } |
| |
| /* Remove metadata area if image has been removed */ |
| if (seg->meta_areas && seg_metalv(seg, s) && (area_reduction == seg->area_len)) { |
| if (!lv_reduce(seg_metalv(seg, s), |
| seg_metalv(seg, s)->le_count)) { |
| log_error("Failed to remove RAID meta-device %s", |
| seg_metalv(seg, s)->name); |
| return 0; |
| } |
| } |
| return 1; |
| } |
| |
| if (area_reduction == seg->area_len) { |
| log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from " |
| "the top of LV %s:%" PRIu32, |
| seg->lv->name, seg->le, s, |
| lv->name, seg_le(seg, s)); |
| |
| if (!remove_seg_from_segs_using_this_lv(lv, seg)) |
| return_0; |
| seg_lv(seg, s) = NULL; |
| seg_le(seg, s) = 0; |
| seg_type(seg, s) = AREA_UNASSIGNED; |
| } |
| |
| return 1; |
| } |
| |
| int release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction) |
| { |
| return _release_and_discard_lv_segment_area(seg, s, area_reduction, 1); |
| } |
| |
| int release_lv_segment_area(struct lv_segment *seg, uint32_t s, uint32_t area_reduction) |
| { |
| return _release_and_discard_lv_segment_area(seg, s, area_reduction, 0); |
| } |
| |
| /* |
| * Move a segment area from one segment to another |
| */ |
| int move_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to, |
| struct lv_segment *seg_from, uint32_t area_from) |
| { |
| struct physical_volume *pv; |
| struct logical_volume *lv; |
| uint32_t pe, le; |
| |
| switch (seg_type(seg_from, area_from)) { |
| case AREA_PV: |
| pv = seg_pv(seg_from, area_from); |
| pe = seg_pe(seg_from, area_from); |
| |
| if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len)) |
| return_0; |
| |
| if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len)) |
| return_0; |
| |
| if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe)) |
| return_0; |
| |
| break; |
| |
| case AREA_LV: |
| lv = seg_lv(seg_from, area_from); |
| le = seg_le(seg_from, area_from); |
| |
| if (!release_lv_segment_area(seg_from, area_from, seg_from->area_len)) |
| return_0; |
| |
| if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len)) |
| return_0; |
| |
| if (!set_lv_segment_area_lv(seg_to, area_to, lv, le, 0)) |
| return_0; |
| |
| break; |
| |
| case AREA_UNASSIGNED: |
| if (!release_lv_segment_area(seg_to, area_to, seg_to->area_len)) |
| return_0; |
| } |
| |
| return 1; |
| } |
| |
| /* |
| * Link part of a PV to an LV segment. |
| */ |
| int set_lv_segment_area_pv(struct lv_segment *seg, uint32_t area_num, |
| struct physical_volume *pv, uint32_t pe) |
| { |
| seg->areas[area_num].type = AREA_PV; |
| |
| if (!(seg_pvseg(seg, area_num) = |
| assign_peg_to_lvseg(pv, pe, seg->area_len, seg, area_num))) |
| return_0; |
| |
| return 1; |
| } |
| |
| /* |
| * Link one LV segment to another. Assumes sizes already match. |
| */ |
| int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num, |
| struct logical_volume *lv, uint32_t le, |
| uint64_t status) |
| { |
| log_very_verbose("Stack %s:%" PRIu32 "[%" PRIu32 "] on LV %s:%" PRIu32, |
| seg->lv->name, seg->le, area_num, lv->name, le); |
| |
| if (status & RAID_META) { |
| seg->meta_areas[area_num].type = AREA_LV; |
| seg_metalv(seg, area_num) = lv; |
| if (le) { |
| log_error(INTERNAL_ERROR "Meta le != 0"); |
| return 0; |
| } |
| seg_metale(seg, area_num) = 0; |
| } else { |
| seg->areas[area_num].type = AREA_LV; |
| seg_lv(seg, area_num) = lv; |
| seg_le(seg, area_num) = le; |
| } |
| lv->status |= status; |
| |
| if (!add_seg_to_segs_using_this_lv(lv, seg)) |
| return_0; |
| |
| return 1; |
| } |
| |
| /* |
| * Prepare for adding parallel areas to an existing segment. |
| */ |
| static int _lv_segment_add_areas(struct logical_volume *lv, |
| struct lv_segment *seg, |
| uint32_t new_area_count) |
| { |
| struct lv_segment_area *newareas; |
| uint32_t areas_sz = new_area_count * sizeof(*newareas); |
| |
| if (!(newareas = dm_pool_zalloc(lv->vg->cmd->mem, areas_sz))) |
| return_0; |
| |
| memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas)); |
| |
| seg->areas = newareas; |
| seg->area_count = new_area_count; |
| |
| return 1; |
| } |
| |
| static uint32_t _calc_area_multiple(const struct segment_type *segtype, |
| const uint32_t area_count, |
| const uint32_t stripes) |
| { |
| if (!area_count) |
| return 1; |
| |
| /* Striped */ |
| if (segtype_is_striped(segtype)) |
| return area_count; |
| |
| /* Parity RAID (e.g. RAID 4/5/6) */ |
| if (segtype_is_raid(segtype) && segtype->parity_devs) { |
| /* |
| * As articulated in _alloc_init, we can tell by |
| * the area_count whether a replacement drive is |
| * being allocated; and if this is the case, then |
| * there is no area_multiple that should be used. |
| */ |
| if (area_count <= segtype->parity_devs) |
| return 1; |
| |
| return area_count - segtype->parity_devs; |
| } |
| |
| /* |
| * RAID10 - only has 2-way mirror right now. |
| * If we are to move beyond 2-way RAID10, then |
| * the 'stripes' argument will always need to |
| * be given. |
| */ |
| if (!strcmp(segtype->name, _lv_type_names[LV_TYPE_RAID10])) { |
| if (!stripes) |
| return area_count / 2; |
| return stripes; |
| } |
| |
| /* Mirrored stripes */ |
| if (stripes) |
| return stripes; |
| |
| /* Mirrored */ |
| return 1; |
| } |
| |
| /* |
| * Reduce the size of an lv_segment. New size can be zero. |
| */ |
| static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction) |
| { |
| uint32_t area_reduction, s; |
| |
| /* Caller must ensure exact divisibility */ |
| if (seg_is_striped(seg)) { |
| if (reduction % seg->area_count) { |
| log_error("Segment extent reduction %" PRIu32 |
| " not divisible by #stripes %" PRIu32, |
| reduction, seg->area_count); |
| return 0; |
| } |
| area_reduction = (reduction / seg->area_count); |
| } else |
| area_reduction = reduction; |
| |
| for (s = 0; s < seg->area_count; s++) |
| if (!release_and_discard_lv_segment_area(seg, s, area_reduction)) |
| return_0; |
| |
| seg->len -= reduction; |
| seg->area_len -= area_reduction; |
| |
| return 1; |
| } |
| |
| /* |
| * Entry point for all LV reductions in size. |
| */ |
| static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete) |
| { |
| struct lv_segment *seg; |
| uint32_t count = extents; |
| uint32_t reduction; |
| struct logical_volume *pool_lv; |
| |
| if (lv_is_merging_origin(lv)) { |
| log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.", |
| find_snapshot(lv)->lv->name, lv->name); |
| clear_snapshot_merge(lv); |
| } |
| |
| dm_list_iterate_back_items(seg, &lv->segments) { |
| if (!count) |
| break; |
| |
| if (seg->len <= count) { |
| if (seg->merge_lv) { |
| log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.", |
| seg->lv->name, seg->merge_lv->name); |
| clear_snapshot_merge(seg->merge_lv); |
| } |
| |
| /* remove this segment completely */ |
| /* FIXME Check this is safe */ |
| if (seg->log_lv && !lv_remove(seg->log_lv)) |
| return_0; |
| |
| if (seg->metadata_lv && !lv_remove(seg->metadata_lv)) |
| return_0; |
| |
| /* Remove cache origin only when removing (not on lv_empty()) */ |
| if (delete && seg_is_cache(seg)) { |
| if (lv_is_pending_delete(seg->lv)) { |
| /* Just dropping reference on origin when pending delete */ |
| if (!remove_seg_from_segs_using_this_lv(seg_lv(seg, 0), seg)) |
| return_0; |
| seg_lv(seg, 0) = NULL; |
| seg_le(seg, 0) = 0; |
| seg_type(seg, 0) = AREA_UNASSIGNED; |
| if (seg->pool_lv && !detach_pool_lv(seg)) |
| return_0; |
| } else if (!lv_remove(seg_lv(seg, 0))) |
| return_0; |
| } |
| |
| if ((pool_lv = seg->pool_lv)) { |
| if (!detach_pool_lv(seg)) |
| return_0; |
| /* When removing cached LV, remove pool as well */ |
| if (seg_is_cache(seg) && !lv_remove(pool_lv)) |
| return_0; |
| } |
| |
| dm_list_del(&seg->list); |
| reduction = seg->len; |
| } else |
| reduction = count; |
| |
| if (!_lv_segment_reduce(seg, reduction)) |
| return_0; |
| count -= reduction; |
| } |
| |
| lv->le_count -= extents; |
| lv->size = (uint64_t) lv->le_count * lv->vg->extent_size; |
| |
| if (!delete) |
| return 1; |
| |
| if (lv == lv->vg->pool_metadata_spare_lv) { |
| lv->status &= ~POOL_METADATA_SPARE; |
| lv->vg->pool_metadata_spare_lv = NULL; |
| } |
| |
| /* Remove the LV if it is now empty */ |
| if (!lv->le_count && !unlink_lv_from_vg(lv)) |
| return_0; |
| else if (lv->vg->fid->fmt->ops->lv_setup && |
| !lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv)) |
| return_0; |
| |
| return 1; |
| } |
| |
| /* |
| * Empty an LV. |
| */ |
| int lv_empty(struct logical_volume *lv) |
| { |
| return _lv_reduce(lv, lv->le_count, 0); |
| } |
| |
| /* |
| * Empty an LV and add error segment. |
| */ |
| int replace_lv_with_error_segment(struct logical_volume *lv) |
| { |
| uint32_t len = lv->le_count; |
| |
| if (len && !lv_empty(lv)) |
| return_0; |
| |
| /* Minimum size required for a table. */ |
| if (!len) |
| len = 1; |
| |
| /* |
| * Since we are replacing the whatever-was-there with |
| * an error segment, we should also clear any flags |
| * that suggest it is anything other than "error". |
| */ |
| /* FIXME Check for other flags that need removing */ |
| lv->status &= ~(MIRROR|MIRRORED|PVMOVE|LOCKED); |
| |
| /* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */ |
| |
| if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR))) |
| return_0; |
| |
| return 1; |
| } |
| |
| int lv_refresh_suspend_resume(const struct logical_volume *lv) |
| { |
| struct cmd_context *cmd = lv->vg->cmd; |
| int r = 1; |
| |
| if (!cmd->partial_activation && lv_is_partial(lv)) { |
| log_error("Refusing refresh of partial LV %s." |
| " Use '--activationmode partial' to override.", |
| display_lvname(lv)); |
| return 0; |
| } |
| |
| if (!suspend_lv(cmd, lv)) { |
| log_error("Failed to suspend %s.", display_lvname(lv)); |
| r = 0; |
| } |
| |
| if (!resume_lv(cmd, lv)) { |
| log_error("Failed to reactivate %s.", display_lvname(lv)); |
| r = 0; |
| } |
| |
| return r; |
| } |
| |
| /* |
| * Remove given number of extents from LV. |
| */ |
| int lv_reduce(struct logical_volume *lv, uint32_t extents) |
| { |
| return _lv_reduce(lv, extents, 1); |
| } |
| |
| int historical_glv_remove(struct generic_logical_volume *glv) |
| { |
| struct generic_logical_volume *origin_glv; |
| struct glv_list *glvl, *user_glvl; |
| struct historical_logical_volume *hlv; |
| int reconnected; |
| |
| if (!glv || !glv->is_historical) |
| return_0; |
| |
| hlv = glv->historical; |
| |
| if (!(glv = find_historical_glv(hlv->vg, hlv->name, 0, &glvl))) { |
| if (!(find_historical_glv(hlv->vg, hlv->name, 1, NULL))) { |
| log_error(INTERNAL_ERROR "historical_glv_remove: historical LV %s/-%s not found ", |
| hlv->vg->name, hlv->name); |
| return 0; |
| } else { |
| log_verbose("Historical LV %s/-%s already on removed list ", |
| hlv->vg->name, hlv->name); |
| return 1; |
| } |
| } |
| |
| if ((origin_glv = hlv->indirect_origin) && |
| !remove_glv_from_indirect_glvs(origin_glv, glv)) |
| return_0; |
| |
| dm_list_iterate_items(user_glvl, &hlv->indirect_glvs) { |
| reconnected = 0; |
| if ((origin_glv && !origin_glv->is_historical) && !user_glvl->glv->is_historical) |
| log_verbose("Removing historical connection between %s and %s.", |
| origin_glv->live->name, user_glvl->glv->live->name); |
| else if (hlv->vg->cmd->record_historical_lvs) { |
| if (!add_glv_to_indirect_glvs(hlv->vg->vgmem, origin_glv, user_glvl->glv)) |
| return_0; |
| reconnected = 1; |
| } |
| |
| if (!reconnected) { |
| /* |
| * Break ancestry chain if we're removing historical LV and tracking |
| * historical LVs is switched off either via: |
| * - "metadata/record_lvs_history=0" config |
| * - "--nohistory" cmd line option |
| * |
| * Also, break the chain if we're unable to store such connection at all |
| * because we're removing the very last historical LV that was in between |
| * live LVs - pure live LVs can't store any indirect origin relation in |
| * metadata - we need at least one historical LV to do that! |
| */ |
| if (user_glvl->glv->is_historical) |
| user_glvl->glv->historical->indirect_origin = NULL; |
| else |
| first_seg(user_glvl->glv->live)->indirect_origin = NULL; |
| } |
| } |
| |
| dm_list_move(&hlv->vg->removed_historical_lvs, &glvl->list); |
| return 1; |
| } |
| |
| /* |
| * Completely remove an LV. |
| */ |
| int lv_remove(struct logical_volume *lv) |
| { |
| if (lv_is_historical(lv)) |
| return historical_glv_remove(lv->this_glv); |
| |
| if (!lv_reduce(lv, lv->le_count)) |
| return_0; |
| |
| return 1; |
| } |
| |
| /* |
| * A set of contiguous physical extents allocated |
| */ |
| struct alloced_area { |
| struct dm_list list; |
| |
| struct physical_volume *pv; |
| uint32_t pe; |
| uint32_t len; |
| }; |
| |
| /* |
| * Details of an allocation attempt |
| */ |
| struct alloc_handle { |
| struct cmd_context *cmd; |
| struct dm_pool *mem; |
| |
| alloc_policy_t alloc; /* Overall policy */ |
| int approx_alloc; /* get as much as possible up to new_extents */ |
| uint32_t new_extents; /* Number of new extents required */ |
| uint32_t area_count; /* Number of parallel areas */ |
| uint32_t parity_count; /* Adds to area_count, but not area_multiple */ |
| uint32_t area_multiple; /* seg->len = area_len * area_multiple */ |
| uint32_t log_area_count; /* Number of parallel logs */ |
| uint32_t metadata_area_count; /* Number of parallel metadata areas */ |
| uint32_t log_len; /* Length of log/metadata_area */ |
| uint32_t region_size; /* Mirror region size */ |
| uint32_t total_area_len; /* Total number of parallel extents */ |
| |
| unsigned maximise_cling; |
| unsigned mirror_logs_separate; /* Force mirror logs on separate PVs? */ |
| |
| /* |
| * RAID devices require a metadata area that accompanies each |
| * device. During initial creation, it is best to look for space |
| * that is new_extents + log_len and then split that between two |
| * allocated areas when found. 'alloc_and_split_meta' indicates |
| * that this is the desired dynamic. |
| * |
| * This same idea is used by cache LVs to get the metadata device |
| * and data device allocated together. |
| */ |
| unsigned alloc_and_split_meta; |
| unsigned split_metadata_is_allocated; /* Metadata has been allocated */ |
| |
| const struct dm_config_node *cling_tag_list_cn; |
| |
| struct dm_list *parallel_areas; /* PVs to avoid */ |
| |
| /* |
| * Contains area_count lists of areas allocated to data stripes |
| * followed by log_area_count lists of areas allocated to log stripes. |
| */ |
| struct dm_list alloced_areas[0]; |
| }; |
| |
| /* |
| * Returns log device size in extents, algorithm from kernel code |
| */ |
| #define BYTE_SHIFT 3 |
| static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len) |
| { |
| size_t area_size, bitset_size, log_size, region_count; |
| |
| area_size = (size_t)area_len * pe_size; |
| region_count = dm_div_up(area_size, region_size); |
| |
| /* Work out how many "unsigned long"s we need to hold the bitset. */ |
| bitset_size = dm_round_up(region_count, sizeof(uint32_t) << BYTE_SHIFT); |
| bitset_size >>= BYTE_SHIFT; |
| |
| /* Log device holds both header and bitset. */ |
| log_size = dm_round_up((MIRROR_LOG_OFFSET << SECTOR_SHIFT) + bitset_size, 1 << SECTOR_SHIFT); |
| log_size >>= SECTOR_SHIFT; |
| log_size = dm_div_up(log_size, pe_size); |
| |
| /* |
| * Kernel requires a mirror to be at least 1 region large. So, |
| * if our mirror log is itself a mirror, it must be at least |
| * 1 region large. This restriction may not be necessary for |
| * non-mirrored logs, but we apply the rule anyway. |
| * |
| * (The other option is to make the region size of the log |
| * mirror smaller than the mirror it is acting as a log for, |
| * but that really complicates things. It's much easier to |
| * keep the region_size the same for both.) |
| */ |
| return (log_size > (region_size / pe_size)) ? log_size : |
| (region_size / pe_size); |
| } |
| |
| /* Is there enough total space or should we give up immediately? */ |
| static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms, |
| uint32_t allocated, uint32_t extents_still_needed) |
| { |
| uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple; |
| uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple; |
| uint32_t metadata_extents_needed = ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN; /* One each */ |
| uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed; |
| uint32_t free_pes = pv_maps_size(pvms); |
| |
| if (total_extents_needed > free_pes) { |
| log_error("Insufficient free space: %" PRIu32 " extents needed," |
| " but only %" PRIu32 " available", |
| total_extents_needed, free_pes); |
| return 0; |
| } |
| |
| return 1; |
| } |
| |
| /* For striped mirrors, all the areas are counted, through the mirror layer */ |
| static uint32_t _stripes_per_mimage(struct lv_segment *seg) |
| { |
| struct lv_segment *last_lvseg; |
| |
| if (seg_is_mirrored(seg) && seg->area_count && seg_type(seg, 0) == AREA_LV) { |
| last_lvseg = dm_list_item(dm_list_last(&seg_lv(seg, 0)->segments), struct lv_segment); |
| if (seg_is_striped(last_lvseg)) |
| return last_lvseg->area_count; |
| } |
| |
| return 1; |
| } |
| |
| static void _init_alloc_parms(struct alloc_handle *ah, |
| struct alloc_parms *alloc_parms, |
| alloc_policy_t alloc, |
| struct lv_segment *prev_lvseg, unsigned can_split, |
| uint32_t allocated, uint32_t extents_still_needed) |
| { |
| alloc_parms->alloc = alloc; |
| alloc_parms->prev_lvseg = prev_lvseg; |
| alloc_parms->flags = 0; |
| alloc_parms->extents_still_needed = extents_still_needed; |
| |
| /* |
| * Only attempt contiguous/cling allocation to previous segment |
| * areas if the number of areas matches. |
| */ |
| if (alloc_parms->prev_lvseg && |
| ((ah->area_count + ah->parity_count) == prev_lvseg->area_count)) { |
| alloc_parms->flags |= A_AREA_COUNT_MATCHES; |
| |
| /* Are there any preceding segments we must follow on from? */ |
| if (alloc_parms->alloc == ALLOC_CONTIGUOUS) { |
| alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG; |
| alloc_parms->flags |= A_POSITIONAL_FILL; |
| } else if ((alloc_parms->alloc == ALLOC_CLING) || |
| (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) { |
| alloc_parms->flags |= A_CLING_TO_LVSEG; |
| alloc_parms->flags |= A_POSITIONAL_FILL; |
| } |
| } else |
| /* |
| * A cling allocation that follows a successful contiguous |
| * allocation must use the same PVs (or else fail). |
| */ |
| if ((alloc_parms->alloc == ALLOC_CLING) || |
| (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) { |
| alloc_parms->flags |= A_CLING_TO_ALLOCED; |
| alloc_parms->flags |= A_POSITIONAL_FILL; |
| } |
| |
| if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS) |
| alloc_parms->flags |= A_CLING_BY_TAGS; |
| |
| if (!(alloc_parms->alloc & A_POSITIONAL_FILL) && |
| (alloc_parms->alloc == ALLOC_CONTIGUOUS) && |
| ah->cling_tag_list_cn) |
| alloc_parms->flags |= A_PARTITION_BY_TAGS; |
| |
| /* |
| * For normal allocations, if any extents have already been found |
| * for allocation, prefer to place further extents on the same disks as |
| * have already been used. |
| */ |
| if (ah->maximise_cling && |
| (alloc_parms->alloc == ALLOC_NORMAL) && |
| (allocated != alloc_parms->extents_still_needed)) |
| alloc_parms->flags |= A_CLING_TO_ALLOCED; |
| |
| if (can_split) |
| alloc_parms->flags |= A_CAN_SPLIT; |
| } |
| |
| static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas) |
| { |
| struct seg_pvs *spvs; |
| struct pv_list *pvl; |
| char *pvnames; |
| |
| if (!parallel_areas) |
| return 1; |
| |
| dm_list_iterate_items(spvs, parallel_areas) { |
| if (!dm_pool_begin_object(mem, 256)) { |
| log_error("dm_pool_begin_object failed"); |
| return 0; |
| } |
| |
| dm_list_iterate_items(pvl, &spvs->pvs) { |
| if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) { |
| log_error("dm_pool_grow_object failed"); |
| dm_pool_abandon_object(mem); |
| return 0; |
| } |
| if (!dm_pool_grow_object(mem, " ", 1)) { |
| log_error("dm_pool_grow_object failed"); |
| dm_pool_abandon_object(mem); |
| return 0; |
| } |
| } |
| |
| if (!dm_pool_grow_object(mem, "\0", 1)) { |
| log_error("dm_pool_grow_object failed"); |
| dm_pool_abandon_object(mem); |
| return 0; |
| } |
| |
| pvnames = dm_pool_end_object(mem); |
| log_debug_alloc("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s", |
| spvs->le, spvs->len, pvnames); |
| dm_pool_free(mem, pvnames); |
| } |
| |
| return 1; |
| } |
| |
| /* Handles also stacking */ |
| static int _setup_lv_size(struct logical_volume *lv, uint32_t extents) |
| { |
| struct lv_segment *thin_pool_seg; |
| |
| lv->le_count = extents; |
| lv->size = (uint64_t) extents * lv->vg->extent_size; |
| |
| if (lv_is_thin_pool_data(lv)) { |
| if (!(thin_pool_seg = get_only_segment_using_this_lv(lv))) |
| return_0; |
| |
| /* Update thin pool segment from the layered LV */ |
| thin_pool_seg->lv->le_count = |
| thin_pool_seg->len = |
| thin_pool_seg->area_len = lv->le_count; |
| thin_pool_seg->lv->size = lv->size; |
| } |
| |
| return 1; |
| } |
| |
| static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status, |
| uint32_t area_count, |
| uint32_t stripe_size, |
| const struct segment_type *segtype, |
| struct alloced_area *aa, |
| uint32_t region_size) |
| { |
| uint32_t s, extents, area_multiple; |
| struct lv_segment *seg; |
| |
| area_multiple = _calc_area_multiple(segtype, area_count, 0); |
| extents = aa[0].len * area_multiple; |
| |
| if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, |
| status, stripe_size, NULL, |
| area_count, |
| aa[0].len, 0u, region_size, 0u, NULL))) { |
| log_error("Couldn't allocate new LV segment."); |
| return 0; |
| } |
| |
| for (s = 0; s < area_count; s++) |
| if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe)) |
| return_0; |
| |
| dm_list_add(&lv->segments, &seg->list); |
| |
| extents = aa[0].len * area_multiple; |
| |
| if (!_setup_lv_size(lv, lv->le_count + extents)) |
| return_0; |
| |
| return 1; |
| } |
| |
| static int _setup_alloced_segments(struct logical_volume *lv, |
| struct dm_list *alloced_areas, |
| uint32_t area_count, |
| uint64_t status, |
| uint32_t stripe_size, |
| const struct segment_type *segtype, |
| uint32_t region_size) |
| { |
| struct alloced_area *aa; |
| |
| dm_list_iterate_items(aa, &alloced_areas[0]) { |
| if (!_setup_alloced_segment(lv, status, area_count, |
| stripe_size, segtype, aa, |
| region_size)) |
| return_0; |
| } |
| |
| return 1; |
| } |
| |
| /* |
| * This function takes a list of pv_areas and adds them to allocated_areas. |
| * If the complete area is not needed then it gets split. |
| * The part used is removed from the pv_map so it can't be allocated twice. |
| */ |
| static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocate, |
| struct alloc_state *alloc_state, uint32_t ix_log_offset) |
| { |
| uint32_t area_len, len; |
| uint32_t s, smeta; |
| uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */ |
| uint32_t total_area_count; |
| struct alloced_area *aa; |
| struct pv_area *pva; |
| |
| total_area_count = ah->area_count + ah->parity_count + alloc_state->log_area_count_still_needed; |
| if (!total_area_count) { |
| log_warn(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do."); |
| return 1; |
| } |
| |
| area_len = max_to_allocate / ah->area_multiple; |
| |
| /* Reduce area_len to the smallest of the areas */ |
| for (s = 0; s < ah->area_count + ah->parity_count; s++) |
| if (area_len > alloc_state->areas[s].used) |
| area_len = alloc_state->areas[s].used; |
| |
| len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? total_area_count * 2 : total_area_count; |
| len *= sizeof(*aa); |
| if (!(aa = dm_pool_alloc(ah->mem, len))) { |
| log_error("alloced_area allocation failed"); |
| return 0; |
| } |
| |
| /* |
| * Areas consists of area_count areas for data stripes, then |
| * ix_log_skip areas to skip, then log_area_count areas to use for the |
| * log, then some areas too small for the log. |
| */ |
| len = area_len; |
| for (s = 0; s < total_area_count; s++) { |
| if (s == (ah->area_count + ah->parity_count)) { |
| ix_log_skip = ix_log_offset - ah->area_count; |
| len = ah->log_len; |
| } |
| |
| pva = alloc_state->areas[s + ix_log_skip].pva; |
| if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) { |
| /* |
| * The metadata area goes at the front of the allocated |
| * space for now, but could easily go at the end (or |
| * middle!). |
| * |
| * Even though we split these two from the same |
| * allocation, we store the images at the beginning |
| * of the areas array and the metadata at the end. |
| */ |
| smeta = s + ah->area_count + ah->parity_count; |
| aa[smeta].pv = pva->map->pv; |
| aa[smeta].pe = pva->start; |
| aa[smeta].len = ah->log_len; |
| |
| log_debug_alloc("Allocating parallel metadata area %" PRIu32 |
| " on %s start PE %" PRIu32 |
| " length %" PRIu32 ".", |
| (smeta - (ah->area_count + ah->parity_count)), |
| pv_dev_name(aa[smeta].pv), aa[smeta].pe, |
| ah->log_len); |
| |
| consume_pv_area(pva, ah->log_len); |
| dm_list_add(&ah->alloced_areas[smeta], &aa[smeta].list); |
| } |
| aa[s].len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? len - ah->log_len : len; |
| /* Skip empty allocations */ |
| if (!aa[s].len) |
| continue; |
| |
| aa[s].pv = pva->map->pv; |
| aa[s].pe = pva->start; |
| |
| log_debug_alloc("Allocating parallel area %" PRIu32 |
| " on %s start PE %" PRIu32 " length %" PRIu32 ".", |
| s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len); |
| |
| consume_pv_area(pva, aa[s].len); |
| |
| dm_list_add(&ah->alloced_areas[s], &aa[s].list); |
| } |
| |
| /* Only need to alloc metadata from the first batch */ |
| if (ah->alloc_and_split_meta) |
| ah->split_metadata_is_allocated = 1; |
| |
| ah->total_area_len += area_len; |
| |
| alloc_state->allocated += area_len * ah->area_multiple; |
| |
| return 1; |
| } |
| |
| /* |
| * Call fn for each AREA_PV used by the LV segment at lv:le of length *max_seg_len. |
| * If any constituent area contains more than one segment, max_seg_len is |
| * reduced to cover only the first. |
| * fn should return 0 on error, 1 to continue scanning or >1 to terminate without error. |
| * In the last case, this function passes on the return code. |
| * FIXME I think some callers are expecting this to check all PV segments used by an LV. |
| */ |
| static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv, |
| uint32_t le, uint32_t len, struct lv_segment *seg, |
| uint32_t *max_seg_len, |
| uint32_t first_area, uint32_t max_areas, |
| int top_level_area_index, |
| int only_single_area_segments, |
| int (*fn)(struct cmd_context *cmd, |
| struct pv_segment *peg, uint32_t s, |
| void *data), |
| void *data) |
| { |
| uint32_t s; |
| uint32_t remaining_seg_len, area_len, area_multiple; |
| uint32_t stripes_per_mimage = 1; |
| int r = 1; |
| |
| if (!seg && !(seg = find_seg_by_le(lv, le))) { |
| log_error("Failed to find segment for %s extent %" PRIu32, |
| lv->name, le); |
| return 0; |
| } |
| |
| /* Remaining logical length of segment */ |
| remaining_seg_len = seg->len - (le - seg->le); |
| |
| if (remaining_seg_len > len) |
| remaining_seg_len = len; |
| |
| if (max_seg_len && *max_seg_len > remaining_seg_len) |
| *max_seg_len = remaining_seg_len; |
| |
| area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0); |
| area_len = (remaining_seg_len / area_multiple) ? : 1; |
| |
| /* For striped mirrors, all the areas are counted, through the mirror layer */ |
| if (top_level_area_index == -1) |
| stripes_per_mimage = _stripes_per_mimage(seg); |
| |
| for (s = first_area; |
| s < seg->area_count && (!max_areas || s <= max_areas); |
| s++) { |
| if (seg_type(seg, s) == AREA_LV) { |
| if (!(r = _for_each_pv(cmd, seg_lv(seg, s), |
| seg_le(seg, s) + |
| (le - seg->le) / area_multiple, |
| area_len, NULL, max_seg_len, 0, |
| (stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U, |
| (top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage), |
| only_single_area_segments, fn, |
| data))) |
| stack; |
| } else if (seg_type(seg, s) == AREA_PV) |
| if (!(r = fn(cmd, seg_pvseg(seg, s), top_level_area_index != -1 ? (uint32_t) top_level_area_index + s : s, data))) |
| stack; |
| if (r != 1) |
| return r; |
| } |
| |
| /* FIXME only_single_area_segments used as workaround to skip log LV - needs new param? */ |
| if (!only_single_area_segments && seg_is_mirrored(seg) && seg->log_lv) { |
| if (!(r = _for_each_pv(cmd, seg->log_lv, 0, seg->log_lv->le_count, NULL, |
| NULL, 0, 0, 0, only_single_area_segments, |
| fn, data))) |
| stack; |
| if (r != 1) |
| return r; |
| } |
| |
| /* FIXME Add snapshot cow, thin meta etc. */ |
| |
| /* |
| if (!only_single_area_segments && !max_areas && seg_is_raid(seg)) { |
| for (s = first_area; s < seg->area_count; s++) { |
| if (seg_metalv(seg, s)) |
| if (!(r = _for_each_pv(cmd, seg_metalv(seg, s), 0, seg_metalv(seg, s)->le_count, NULL, |
| NULL, 0, 0, 0, 0, fn, data))) |
| stack; |
| if (r != 1) |
| return r; |
| } |
| } |
| */ |
| |
| return 1; |
| } |
| |
| static int _comp_area(const void *l, const void *r) |
| { |
| const struct pv_area_used *lhs = (const struct pv_area_used *) l; |
| const struct pv_area_used *rhs = (const struct pv_area_used *) r; |
| |
| if (lhs->used < rhs->used) |
| return 1; |
| |
| else if (lhs->used > rhs->used) |
| return -1; |
| |
| return 0; |
| } |
| |
| /* |
| * Search for pvseg that matches condition |
| */ |
| struct pv_match { |
| int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva); |
| |
| struct alloc_handle *ah; |
| struct alloc_state *alloc_state; |
| struct pv_area *pva; |
| const struct dm_config_node *cling_tag_list_cn; |
| int s; /* Area index of match */ |
| }; |
| |
| /* |
| * Is PV area on the same PV? |
| */ |
| static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva) |
| { |
| if (pvseg->pv != pva->map->pv) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* |
| * Does PV area have a tag listed in allocation/cling_tag_list that |
| * matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags? |
| * If tags_list_str is set, then instead we generate a list of matching tags for printing. |
| */ |
| static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn, |
| struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num, |
| struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only, |
| struct dm_pool *mem, const char **tags_list_str) |
| { |
| const struct dm_config_value *cv; |
| const char *str; |
| const char *tag_matched; |
| struct dm_list *tags_to_match = tags_list_str ? NULL : pv_tags ? : &pv2->tags; |
| struct dm_str_list *sl; |
| unsigned first_tag = 1; |
| |
| if (tags_list_str && !dm_pool_begin_object(mem, 256)) { |
| log_error("PV tags string allocation failed"); |
| return 0; |
| } |
| |
| for (cv = cling_tag_list_cn->v; cv; cv = cv->next) { |
| if (cv->type != DM_CFG_STRING) { |
| if (validate_only) |
| log_warn("WARNING: Ignoring invalid string in config file entry " |
| "allocation/cling_tag_list"); |
| continue; |
| } |
| str = cv->v.str; |
| if (!*str) { |
| if (validate_only) |
| log_warn("WARNING: Ignoring empty string in config file entry " |
| "allocation/cling_tag_list"); |
| continue; |
| } |
| |
| if (*str != '@') { |
| if (validate_only) |
| log_warn("WARNING: Ignoring string not starting with @ in config file entry " |
| "allocation/cling_tag_list: %s", str); |
| continue; |
| } |
| |
| str++; |
| |
| if (!*str) { |
| if (validate_only) |
| log_warn("WARNING: Ignoring empty tag in config file entry " |
| "allocation/cling_tag_list"); |
| continue; |
| } |
| |
| if (validate_only) |
| continue; |
| |
| /* Wildcard matches any tag against any tag. */ |
| if (!strcmp(str, "*")) { |
| if (tags_list_str) { |
| dm_list_iterate_items(sl, &pv1->tags) { |
| if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) { |
| dm_pool_abandon_object(mem); |
| log_error("PV tags string extension failed."); |
| return 0; |
| } |
| first_tag = 0; |
| if (!dm_pool_grow_object(mem, sl->str, 0)) { |
| dm_pool_abandon_object(mem); |
| log_error("PV tags string extension failed."); |
| return 0; |
| } |
| } |
| continue; |
| } |
| if (!str_list_match_list(&pv1->tags, tags_to_match, &tag_matched)) |
| continue; |
| else { |
| if (!pv_tags) |
| log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.", |
| tag_matched, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-"); |
| else |
| log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32 |
| " from consideration: PV tag %s already used.", |
| area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched); |
| return 1; |
| } |
| } |
| |
| if (!str_list_match_item(&pv1->tags, str) || |
| (tags_to_match && !str_list_match_item(tags_to_match, str))) |
| continue; |
| else { |
| if (tags_list_str) { |
| if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) { |
| dm_pool_abandon_object(mem); |
| log_error("PV tags string extension failed."); |
| return 0; |
| } |
| first_tag = 0; |
| if (!dm_pool_grow_object(mem, str, 0)) { |
| dm_pool_abandon_object(mem); |
| log_error("PV tags string extension failed."); |
| return 0; |
| } |
| continue; |
| } |
| if (!pv_tags) |
| log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.", |
| str, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-"); |
| else |
| log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32 |
| " from consideration: PV tag %s already used.", |
| area_num, pv_dev_name(pv1), pv1_start_pe, str); |
| return 1; |
| } |
| } |
| |
| if (tags_list_str) { |
| if (!dm_pool_grow_object(mem, "\0", 1)) { |
| dm_pool_abandon_object(mem); |
| log_error("PV tags string extension failed."); |
| return 0; |
| } |
| *tags_list_str = dm_pool_end_object(mem); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn) |
| { |
| return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, NULL); |
| } |
| |
| static const char *_tags_list_str(struct alloc_handle *ah, struct physical_volume *pv1) |
| { |
| const char *tags_list_str; |
| |
| if (!_match_pv_tags(ah->cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, ah->mem, &tags_list_str)) |
| return_NULL; |
| |
| return tags_list_str; |
| } |
| |
| /* |
| * Does PV area have a tag listed in allocation/cling_tag_list that |
| * matches a tag in the pv_tags list? |
| */ |
| static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn, |
| struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num, |
| struct dm_list *pv_tags) |
| { |
| return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, NULL); |
| } |
| |
| /* |
| * Does PV area have a tag listed in allocation/cling_tag_list that |
| * matches a tag of the PV of the existing segment? |
| */ |
| static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn, |
| struct physical_volume *pv1, struct physical_volume *pv2) |
| { |
| return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, NULL); |
| } |
| |
| static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva) |
| { |
| return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv); |
| } |
| |
| /* |
| * Is PV area contiguous to PV segment? |
| */ |
| static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct pv_segment *pvseg, struct pv_area *pva) |
| { |
| if (pvseg->pv != pva->map->pv) |
| return 0; |
| |
| if (pvseg->pe + pvseg->len != pva->start) |
| return 0; |
| |
| return 1; |
| } |
| |
| static void _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva, |
| uint32_t required, uint32_t ix_pva, uint32_t unreserved) |
| { |
| struct pv_area_used *area_used = &alloc_state->areas[ix_pva]; |
| const char *pv_tag_list = NULL; |
| |
| if (ah->cling_tag_list_cn) |
| pv_tag_list = _tags_list_str(ah, pva->map->pv); |
| |
| log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32 |
| " length %" PRIu32 " leaving %" PRIu32 "%s%s.", |
| area_used->pva ? "Changing " : "Considering", |
| ix_pva, area_used->pva ? "to" : "as", |
| dev_name(pva->map->pv->dev), pva->start, required, unreserved, |
| pv_tag_list ? " with PV tags: " : "", |
| pv_tag_list ? : ""); |
| |
| if (pv_tag_list) |
| dm_pool_free(ah->mem, (void *)pv_tag_list); |
| |
| area_used->pva = pva; |
| area_used->used = required; |
| } |
| |
| static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva, |
| uint32_t required, uint32_t ix_pva, uint32_t unreserved) |
| { |
| uint32_t s; |
| |
| /* Expand areas array if needed after an area was split. */ |
| if (ix_pva >= alloc_state->areas_size) { |
| alloc_state->areas_size *= 2; |
| if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) { |
| log_error("Memory reallocation for parallel areas failed."); |
| return 0; |
| } |
| for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++) |
| alloc_state->areas[s].pva = NULL; |
| } |
| |
| _reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved); |
| |
| return 1; |
| } |
| |
| static int _is_condition(struct cmd_context *cmd __attribute__((unused)), |
| struct pv_segment *pvseg, uint32_t s, |
| void *data) |
| { |
| struct pv_match *pvmatch = data; |
| int positional = pvmatch->alloc_state->alloc_parms->flags & A_POSITIONAL_FILL; |
| |
| if (positional && pvmatch->alloc_state->areas[s].pva) |
| return 1; /* Area already assigned */ |
| |
| if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva)) |
| return 1; /* Continue */ |
| |
| if (positional && (s >= pvmatch->alloc_state->num_positional_areas)) |
| return 1; |
| |
| /* FIXME The previous test should make this one redundant. */ |
| if (positional && (s >= pvmatch->alloc_state->areas_size)) |
| return 1; |
| |
| /* |
| * Only used for cling and contiguous policies (which only make one allocation per PV) |
| * so it's safe to say all the available space is used. |
| */ |
| if (positional) |
| _reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0); |
| |
| return 2; /* Finished */ |
| } |
| |
| /* |
| * Is pva on same PV as any existing areas? |
| */ |
| static int _check_cling(struct alloc_handle *ah, |
| const struct dm_config_node *cling_tag_list_cn, |
| struct lv_segment *prev_lvseg, struct pv_area *pva, |
| struct alloc_state *alloc_state) |
| { |
| struct pv_match pvmatch; |
| int r; |
| uint32_t le, len; |
| |
| pvmatch.ah = ah; |
| pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv; |
| pvmatch.alloc_state = alloc_state; |
| pvmatch.pva = pva; |
| pvmatch.cling_tag_list_cn = cling_tag_list_cn; |
| |
| if (ah->maximise_cling) { |
| /* Check entire LV */ |
| le = 0; |
| len = prev_lvseg->le + prev_lvseg->len; |
| } else { |
| /* Only check 1 LE at end of previous LV segment */ |
| le = prev_lvseg->le + prev_lvseg->len - 1; |
| len = 1; |
| } |
| |
| /* FIXME Cope with stacks by flattening */ |
| if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, le, len, NULL, NULL, |
| 0, 0, -1, 1, |
| _is_condition, &pvmatch))) |
| stack; |
| |
| if (r != 2) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* |
| * Is pva contiguous to any existing areas or on the same PV? |
| */ |
| static int _check_contiguous(struct alloc_handle *ah, |
| struct lv_segment *prev_lvseg, struct pv_area *pva, |
| struct alloc_state *alloc_state) |
| { |
| struct pv_match pvmatch; |
| int r; |
| |
| pvmatch.ah = ah; |
| pvmatch.condition = _is_contiguous; |
| pvmatch.alloc_state = alloc_state; |
| pvmatch.pva = pva; |
| pvmatch.cling_tag_list_cn = NULL; |
| |
| /* FIXME Cope with stacks by flattening */ |
| if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv, |
| prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL, |
| 0, 0, -1, 1, |
| _is_condition, &pvmatch))) |
| stack; |
| |
| if (r != 2) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* |
| * Is pva on same PV as any areas already used in this allocation attempt? |
| */ |
| static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_config_node *cling_tag_list_cn, |
| struct pv_area *pva, struct alloc_state *alloc_state) |
| { |
| unsigned s; |
| struct alloced_area *aa; |
| int positional = alloc_state->alloc_parms->flags & A_POSITIONAL_FILL; |
| |
| /* |
| * Ignore log areas. They are always allocated whole as part of the |
| * first allocation. If they aren't yet set, we know we've nothing to do. |
| */ |
| if (alloc_state->log_area_count_still_needed) |
| return 0; |
| |
| for (s = 0; s < ah->area_count; s++) { |
| if (positional && alloc_state->areas[s].pva) |
| continue; /* Area already assigned */ |
| dm_list_iterate_items(aa, &ah->alloced_areas[s]) { |
| if ((!cling_tag_list_cn && (pva->map->pv == aa[0].pv)) || |
| (cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv))) { |
| if (positional) |
| _reserve_required_area(ah, alloc_state, pva, pva->count, s, 0); |
| return 1; |
| } |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs) |
| { |
| struct pv_list *pvl; |
| |
| dm_list_iterate_items(pvl, parallel_pvs) |
| if (pv == pvl->pv) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* |
| * Decide whether or not to try allocation from supplied area pva. |
| * alloc_state->areas may get modified. |
| */ |
| static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed, |
| struct alloc_state *alloc_state, |
| unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count) |
| { |
| const struct alloc_parms *alloc_parms = alloc_state->alloc_parms; |
| unsigned s; |
| |
| /* Skip fully-reserved areas (which are not currently removed from the list). */ |
| if (!pva->unreserved) |
| return NEXT_AREA; |
| |
| /* FIXME Should this test be removed? */ |
| if (iteration_count) |
| /* |
| * Don't use an area twice. |
| */ |
| for (s = 0; s < alloc_state->areas_size; s++) |
| if (alloc_state->areas[s].pva == pva) |
| return NEXT_AREA; |
| |
| /* If maximise_cling is set, perform several checks, otherwise perform exactly one. */ |
| if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG | A_CLING_TO_ALLOCED)) { |
| /* Contiguous? */ |
| if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) || |
| (ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) && |
| _check_contiguous(ah, alloc_parms->prev_lvseg, pva, alloc_state)) |
| goto found; |
| |
| /* Try next area on same PV if looking for contiguous space */ |
| if (alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) |
| return NEXT_AREA; |
| |
| /* Cling to prev_lvseg? */ |
| if (((alloc_parms->flags & A_CLING_TO_LVSEG) || |
| (ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) && |
| _check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state)) |
| /* If this PV is suitable, use this first area */ |
| goto found; |
| |
| /* Cling_to_alloced? */ |
| if ((alloc_parms->flags & A_CLING_TO_ALLOCED) && |
| _check_cling_to_alloced(ah, NULL, pva, alloc_state)) |
| goto found; |
| |
| /* Cling_by_tags? */ |
| if (!(alloc_parms->flags & A_CLING_BY_TAGS) || !ah->cling_tag_list_cn) |
| return NEXT_PV; |
| |
| if ((alloc_parms->flags & A_AREA_COUNT_MATCHES)) { |
| if (_check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state)) |
| goto found; |
| } else if (_check_cling_to_alloced(ah, ah->cling_tag_list_cn, pva, alloc_state)) |
| goto found; |
| |
| /* All areas on this PV give same result so pointless checking more */ |
| return NEXT_PV; |
| } |
| |
| /* Normal/Anywhere */ |
| |
| /* Is it big enough on its own? */ |
| if (pva->unreserved * ah->area_multiple < still_needed && |
| ((!(alloc_parms->flags & A_CAN_SPLIT) && !ah->log_area_count) || |
| (already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE))) |
| return NEXT_PV; |
| |
| found: |
| if (alloc_parms->flags & A_POSITIONAL_FILL) |
| return PREFERRED; |
| |
| return USE_AREA; |
| } |
| |
| /* |
| * Decide how many extents we're trying to obtain from a given area. |
| * Removes the extents from further consideration. |
| */ |
| static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc) |
| { |
| uint32_t required = max_to_allocate / ah->area_multiple; |
| |
| /* |
| * Update amount unreserved - effectively splitting an area |
| * into two or more parts. If the whole stripe doesn't fit, |
| * reduce amount we're looking for. |
| */ |
| if (alloc == ALLOC_ANYWHERE) { |
| if (ix_pva >= ah->area_count + ah->parity_count) |
| required = ah->log_len; |
| } else if (required < ah->log_len) |
| required = ah->log_len; |
| |
| if (required >= pva->unreserved) { |
| required = pva->unreserved; |
| pva->unreserved = 0; |
| } else { |
| pva->unreserved -= required; |
| reinsert_changed_pv_area(pva); |
| } |
| |
| return required; |
| } |
| |
| static void _clear_areas(struct alloc_state *alloc_state) |
| { |
| uint32_t s; |
| |
| alloc_state->num_positional_areas = 0; |
| |
| for (s = 0; s < alloc_state->areas_size; s++) |
| alloc_state->areas[s].pva = NULL; |
| } |
| |
| static void _reset_unreserved(struct dm_list *pvms) |
| { |
| struct pv_map *pvm; |
| struct pv_area *pva; |
| |
| dm_list_iterate_items(pvm, pvms) |
| dm_list_iterate_items(pva, &pvm->areas) |
| if (pva->unreserved != pva->count) { |
| pva->unreserved = pva->count; |
| reinsert_changed_pv_area(pva); |
| } |
| } |
| |
| static void _report_needed_allocation_space(struct alloc_handle *ah, |
| struct alloc_state *alloc_state, |
| struct dm_list *pvms) |
| { |
| const char *metadata_type; |
| uint32_t parallel_areas_count, parallel_area_size; |
| uint32_t metadata_count, metadata_size; |
| |
| parallel_area_size = ah->new_extents - alloc_state->allocated; |
| parallel_area_size /= ah->area_multiple; |
| parallel_area_size -= (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0; |
| |
| parallel_areas_count = ah->area_count + ah->parity_count; |
| |
| metadata_size = ah->log_len; |
| if (ah->alloc_and_split_meta) { |
| metadata_type = "metadata area"; |
| metadata_count = parallel_areas_count; |
| if (ah->split_metadata_is_allocated) |
| metadata_size = 0; |
| } else { |
| metadata_type = "mirror log"; |
| metadata_count = alloc_state->log_area_count_still_needed; |
| } |
| |
| log_debug_alloc("Still need %s%" PRIu32 " total extents from %" PRIu32 " remaining (%" PRIu32 " positional slots):", |
| ah->approx_alloc ? "up to " : "", |
| parallel_area_size * parallel_areas_count + metadata_size * metadata_count, pv_maps_size(pvms), |
| alloc_state->num_positional_areas); |
| log_debug_alloc(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32 |
| " parity) parallel areas of %" PRIu32 " extents each", |
| parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size); |
| log_debug_alloc(" %" PRIu32 " %s%s of %" PRIu32 " extents each", |
| metadata_count, metadata_type, |
| (metadata_count == 1) ? "" : "s", |
| metadata_size); |
| } |
| |
| /* Work through the array, removing any entries with tags already used by previous areas. */ |
| static int _limit_to_one_area_per_tag(struct alloc_handle *ah, struct alloc_state *alloc_state, |
| uint32_t ix_log_offset, unsigned *ix) |
| { |
| uint32_t s = 0, u = 0; |
| DM_LIST_INIT(pv_tags); |
| |
| while (s < alloc_state->areas_size && alloc_state->areas[s].pva) { |
| /* Start again with an empty tag list when we reach the log devices */ |
| if (u == ix_log_offset) |
| dm_list_init(&pv_tags); |
| if (!_pv_has_matching_tag(ah->cling_tag_list_cn, alloc_state->areas[s].pva->map->pv, alloc_state->areas[s].pva->start, s, &pv_tags)) { |
| /* The comparison fn will ignore any non-cling tags so just add everything */ |
| if (!str_list_add_list(ah->mem, &pv_tags, &alloc_state->areas[s].pva->map->pv->tags)) |
| return_0; |
| |
| if (s != u) |
| alloc_state->areas[u] = alloc_state->areas[s]; |
| |
| u++; |
| } else |
| (*ix)--; /* One area removed */ |
| |
| s++; |
| } |
| |
| alloc_state->areas[u].pva = NULL; |
| |
| return 1; |
| } |
| |
| /* |
| * Returns 1 regardless of whether any space was found, except on error. |
| */ |
| static int _find_some_parallel_space(struct alloc_handle *ah, |
| struct dm_list *pvms, struct alloc_state *alloc_state, |
| struct dm_list *parallel_pvs, uint32_t max_to_allocate) |
| { |
| const struct alloc_parms *alloc_parms = alloc_state->alloc_parms; |
| unsigned ix = 0; |
| unsigned last_ix; |
| struct pv_map *pvm; |
| struct pv_area *pva; |
| unsigned preferred_count = 0; |
| unsigned already_found_one; |
| unsigned ix_log_offset; /* Offset to start of areas to use for log */ |
| unsigned too_small_for_log_count; /* How many too small for log? */ |
| unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */ |
| unsigned log_iteration_count = 0; /* extra iteration for logs on data devices */ |
| struct alloced_area *aa; |
| uint32_t s; |
| uint32_t devices_needed = ah->area_count + ah->parity_count; |
| uint32_t required; |
| |
| _clear_areas(alloc_state); |
| _reset_unreserved(pvms); |
| |
| /* num_positional_areas holds the number of parallel allocations that must be contiguous/cling */ |
| /* These appear first in the array, so it is also the offset to the non-preferred allocations */ |
| /* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */ |
| if (!(alloc_parms->flags & A_POSITIONAL_FILL)) |
| alloc_state->num_positional_areas = 0; |
| else if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG)) |
| alloc_state->num_positional_areas = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count; |
| else if (alloc_parms->flags & A_CLING_TO_ALLOCED) |
| alloc_state->num_positional_areas = ah->area_count; |
| |
| if (alloc_parms->alloc == ALLOC_NORMAL || (alloc_parms->flags & A_CLING_TO_ALLOCED)) |
| log_debug_alloc("Cling_to_allocated is %sset", |
| alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not "); |
| |
| if (alloc_parms->flags & A_POSITIONAL_FILL) |
| log_debug_alloc("%u preferred area(s) to be filled positionally.", alloc_state->num_positional_areas); |
| else |
| log_debug_alloc("Areas to be sorted and filled sequentially."); |
| |
| _report_needed_allocation_space(ah, alloc_state, pvms); |
| |
| /* ix holds the number of areas found on other PVs */ |
| do { |
| if (log_iteration_count) { |
| log_debug_alloc("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed); |
| } else if (iteration_count) |
| log_debug_alloc("Filled %u out of %u preferred areas so far.", preferred_count, alloc_state->num_positional_areas); |
| |
| /* |
| * Provide for escape from the loop if no progress is made. |
| * This should not happen: ALLOC_ANYWHERE should be able to use |
| * all available space. (If there aren't enough extents, the code |
| * should not reach this point.) |
| */ |
| last_ix = ix; |
| |
| /* |
| * Put the smallest area of each PV that is at least the |
| * size we need into areas array. If there isn't one |
| * that fits completely and we're allowed more than one |
| * LV segment, then take the largest remaining instead. |
| */ |
| dm_list_iterate_items(pvm, pvms) { |
| /* PV-level checks */ |
| if (dm_list_empty(&pvm->areas)) |
| continue; /* Next PV */ |
| |
| if (alloc_parms->alloc != ALLOC_ANYWHERE) { |
| /* Don't allocate onto the log PVs */ |
| if (ah->log_area_count) |
| dm_list_iterate_items(aa, &ah->alloced_areas[ah->area_count]) |
| for (s = 0; s < ah->log_area_count; s++) |
| if (!aa[s].pv) |
| goto next_pv; |
| |
| /* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */ |
| /* (I've temporatily disabled the check.) */ |
| /* Avoid PVs used by existing parallel areas */ |
| if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs)) |
| goto next_pv; |
| |
| /* |
| * Avoid PVs already set aside for log. |
| * We only reach here if there were enough PVs for the main areas but |
| * not enough for the logs. |
| */ |
| if (log_iteration_count) { |
| for (s = devices_needed; s < ix + alloc_state->num_positional_areas; s++) |
| if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv) |
| goto next_pv; |
| /* On a second pass, avoid PVs already used in an uncommitted area */ |
| } else if (iteration_count) |
| for (s = 0; s < devices_needed; s++) |
| if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv) |
| goto next_pv; |
| } |
| |
| already_found_one = 0; |
| /* First area in each list is the largest */ |
| dm_list_iterate_items(pva, &pvm->areas) { |
| /* |
| * There are two types of allocations, which can't be mixed at present: |
| * |
| * PREFERRED are stored immediately in a specific parallel slot. |
| * This is only used if the A_POSITIONAL_FILL flag is set. |
| * This requires the number of slots to match, so if comparing with |
| * prev_lvseg then A_AREA_COUNT_MATCHES must be set. |
| * |
| * USE_AREA are stored for later, then sorted and chosen from. |
| */ |
| switch(_check_pva(ah, pva, max_to_allocate, |
| alloc_state, already_found_one, iteration_count, log_iteration_count)) { |
| |
| case PREFERRED: |
| preferred_count++; |
| /* Fall through */ |
| |
| case NEXT_PV: |
| goto next_pv; |
| |
| case NEXT_AREA: |
| continue; |
| |
| case USE_AREA: |
| /* |
| * Except with ALLOC_ANYWHERE, replace first area with this |
| * one which is smaller but still big enough. |
| */ |
| if (!already_found_one || |
| alloc_parms->alloc == ALLOC_ANYWHERE) { |
| ix++; |
| already_found_one = 1; |
| } |
| |
| /* Reserve required amount of pva */ |
| required = _calc_required_extents(ah, pva, ix + alloc_state->num_positional_areas - 1, max_to_allocate, alloc_parms->alloc); |
| if (!_reserve_required_area(ah, alloc_state, pva, required, ix + alloc_state->num_positional_areas - 1, pva->unreserved)) |
| return_0; |
| } |
| |
| } |
| |
| next_pv: |
| /* With ALLOC_ANYWHERE we ignore further PVs once we have at least enough areas */ |
| /* With cling and contiguous we stop if we found a match for *all* the areas */ |
| /* FIXME Rename these variables! */ |
| if ((alloc_parms->alloc == ALLOC_ANYWHERE && |
| ix + alloc_state->num_positional_areas >= devices_needed + alloc_state->log_area_count_still_needed) || |
| (preferred_count == alloc_state->num_positional_areas && |
| (alloc_state->num_positional_areas == devices_needed + alloc_state->log_area_count_still_needed))) |
| break; |
| } |
| } while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) || |
| /* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */ |
| (alloc_parms->alloc == ALLOC_NORMAL && preferred_count && |
| (preferred_count < alloc_state->num_positional_areas || alloc_state->log_area_count_still_needed) && |
| (alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) || |
| /* Extra iteration needed to fill log areas on PVs already used? */ |
| (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == alloc_state->num_positional_areas && !ah->mirror_logs_separate && |
| (ix + preferred_count >= devices_needed) && |
| (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++)); |
| |
| /* Non-zero ix means at least one USE_AREA was returned */ |
| if (preferred_count < alloc_state->num_positional_areas && !(alloc_parms->flags & A_CLING_TO_ALLOCED) && !ix) |
| return 1; |
| |
| if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) |
| return 1; |
| |
| /* Sort the areas so we allocate from the biggest */ |
| if (log_iteration_count) { |
| if (ix > devices_needed + 1) { |
| log_debug_alloc("Sorting %u log areas", ix - devices_needed); |
| qsort(alloc_state->areas + devices_needed, ix - devices_needed, sizeof(*alloc_state->areas), |
| _comp_area); |
| } |
| } else if (ix > 1) { |
| log_debug_alloc("Sorting %u areas", ix); |
| qsort(alloc_state->areas + alloc_state->num_positional_areas, ix, sizeof(*alloc_state->areas), |
| _comp_area); |
| } |
| |
| /* If there are gaps in our preferred areas, fill them from the sorted part of the array */ |
| if (preferred_count && preferred_count != alloc_state->num_positional_areas) { |
| for (s = 0; s < devices_needed; s++) |
| if (!alloc_state->areas[s].pva) { |
| alloc_state->areas[s].pva = alloc_state->areas[alloc_state->num_positional_areas].pva; |
| alloc_state->areas[s].used = alloc_state->areas[alloc_state->num_positional_areas].used; |
| alloc_state->areas[alloc_state->num_positional_areas++].pva = NULL; |
| } |
| } |
| |
| /* |
| * First time around, if there's a log, allocate it on the |
| * smallest device that has space for it. |
| */ |
| too_small_for_log_count = 0; |
| ix_log_offset = 0; |
| |
| /* FIXME This logic is due to its heritage and can be simplified! */ |
| if (alloc_state->log_area_count_still_needed) { |
| /* How many areas are too small for the log? */ |
| while (too_small_for_log_count < alloc_state->num_positional_areas + ix && |
| (*(alloc_state->areas + alloc_state->num_positional_areas + ix - 1 - |
| too_small_for_log_count)).used < ah->log_len) |
| too_small_for_log_count++; |
| ix_log_offset = alloc_state->num_positional_areas + ix - too_small_for_log_count - ah->log_area_count; |
| } |
| |
| if (ix + alloc_state->num_positional_areas < devices_needed + |
| (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed + |
| too_small_for_log_count : 0)) |
| return 1; |
| |
| /* |
| * FIXME We should change the code to do separate calls for the log allocation |
| * and the data allocation so that _limit_to_one_area_per_tag doesn't have to guess |
| * where the split is going to occur. |
| */ |
| |
| /* |
| * This code covers the initial allocation - after that there is something to 'cling' to |
| * and we shouldn't get this far. |
| * alloc_state->num_positional_areas is assumed to be 0 with A_PARTITION_BY_TAGS. |
| * |
| * FIXME Consider a second attempt with A_PARTITION_BY_TAGS if, for example, the largest area |
| * had all the tags set, but other areas don't. |
| */ |
| if ((alloc_parms->flags & A_PARTITION_BY_TAGS) && !alloc_state->num_positional_areas) { |
| if (!_limit_to_one_area_per_tag(ah, alloc_state, ix_log_offset, &ix)) |
| return_0; |
| |
| /* Recalculate log position because we might have removed some areas from consideration */ |
| if (alloc_state->log_area_count_still_needed) { |
| /* How many areas are too small for the log? */ |
| too_small_for_log_count = 0; |
| while (too_small_for_log_count < ix && |
| (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).pva && |
| (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).used < ah->log_len) |
| too_small_for_log_count++; |
| if (ix < too_small_for_log_count + ah->log_area_count) |
| return 1; |
| ix_log_offset = ix - too_small_for_log_count - ah->log_area_count; |
| } |
| |
| if (ix < devices_needed + |
| (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed + |
| too_small_for_log_count : 0)) |
| return 1; |
| } |
| |
| /* |
| * Finally add the space identified to the list of areas to be used. |
| */ |
| if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset)) |
| return_0; |
| |
| /* |
| * Log is always allocated first time. |
| */ |
| alloc_state->log_area_count_still_needed = 0; |
| |
| return 1; |
| } |
| |
| /* |
| * Choose sets of parallel areas to use, respecting any constraints |
| * supplied in alloc_parms. |
| */ |
| static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms, |
| struct dm_list *pvms, struct alloc_state *alloc_state) |
| { |
| uint32_t max_tmp; |
| uint32_t max_to_allocate; /* Maximum extents to allocate this time */ |
| uint32_t old_allocated; |
| uint32_t next_le; |
| struct seg_pvs *spvs; |
| struct dm_list *parallel_pvs; |
| |
| alloc_state->alloc_parms = alloc_parms; |
| |
| /* FIXME This algorithm needs a lot of cleaning up! */ |
| /* FIXME anywhere doesn't find all space yet */ |
| do { |
| parallel_pvs = NULL; |
| max_to_allocate = alloc_parms->extents_still_needed - alloc_state->allocated; |
| |
| /* |
| * If there are existing parallel PVs, avoid them and reduce |
| * the maximum we can allocate in one go accordingly. |
| */ |
| if (ah->parallel_areas) { |
| next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple; |
| dm_list_iterate_items(spvs, ah->parallel_areas) { |
| if (next_le >= spvs->le + spvs->len) |
| continue; |
| |
| max_tmp = max_to_allocate + |
| alloc_state->allocated; |
| |
| /* |
| * Because a request that groups metadata and |
| * data together will be split, we must adjust |
| * the comparison accordingly. |
| */ |
| if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) |
| max_tmp -= ah->log_len; |
| if (max_tmp > (spvs->le + spvs->len) * ah->area_multiple) { |
| max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated; |
| max_to_allocate += (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0; |
| } |
| parallel_pvs = &spvs->pvs; |
| break; |
| } |
| } |
| |
| old_allocated = alloc_state->allocated; |
| |
| if (!_find_some_parallel_space(ah, pvms, alloc_state, parallel_pvs, max_to_allocate)) |
| return_0; |
| |
| /* |
| * For ALLOC_CLING, if the number of areas matches and maximise_cling is |
| * set we allow two passes, first with A_POSITIONAL_FILL then without. |
| * |
| * If we didn't allocate anything this time with ALLOC_NORMAL and had |
| * A_CLING_TO_ALLOCED set, try again without it. |
| * |
| * For ALLOC_NORMAL, if we did allocate something without the |
| * flag set, set it and continue so that further allocations |
| * remain on the same disks where possible. |
| */ |
| if (old_allocated == alloc_state->allocated) { |
| if (ah->maximise_cling && ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) && |
| (alloc_parms->flags & A_CLING_TO_LVSEG) && (alloc_parms->flags & A_POSITIONAL_FILL)) |
| alloc_parms->flags &= ~A_POSITIONAL_FILL; |
| else if ((alloc_parms->alloc == ALLOC_NORMAL) && (alloc_parms->flags & A_CLING_TO_ALLOCED)) |
| alloc_parms->flags &= ~A_CLING_TO_ALLOCED; |
| else |
| break; /* Give up */ |
| } else if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL && |
|