| /* |
| * Copyright (C) 2007-2009 Red Hat, Inc. All rights reserved. |
| * |
| * This file is part of LVM2. |
| * |
| * This copyrighted material is made available to anyone wishing to use, |
| * modify, copy, or redistribute it subject to the terms and conditions |
| * of the GNU Lesser General Public License v.2.1. |
| * |
| * You should have received a copy of the GNU Lesser General Public License |
| * along with this program; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| /* |
| * This provides the interface between clvmd and OpenAIS as the cluster |
| * and lock manager. |
| */ |
| |
| #include "clvmd-common.h" |
| |
| #include <pthread.h> |
| #include <fcntl.h> |
| #include <syslog.h> |
| |
| #include <openais/saAis.h> |
| #include <openais/saLck.h> |
| |
| #include <corosync/corotypes.h> |
| #include <corosync/cpg.h> |
| |
| #include "locking.h" |
| #include "clvm.h" |
| #include "clvmd-comms.h" |
| #include "lvm-functions.h" |
| #include "clvmd.h" |
| |
| /* Timeout value for several openais calls */ |
| #define TIMEOUT 10 |
| |
| static void openais_cpg_deliver_callback (cpg_handle_t handle, |
| const struct cpg_name *groupName, |
| uint32_t nodeid, |
| uint32_t pid, |
| void *msg, |
| size_t msg_len); |
| static void openais_cpg_confchg_callback(cpg_handle_t handle, |
| const struct cpg_name *groupName, |
| const struct cpg_address *member_list, size_t member_list_entries, |
| const struct cpg_address *left_list, size_t left_list_entries, |
| const struct cpg_address *joined_list, size_t joined_list_entries); |
| |
| static void _cluster_closedown(void); |
| |
| /* Hash list of nodes in the cluster */ |
| static struct dm_hash_table *node_hash; |
| |
| /* For associating lock IDs & resource handles */ |
| static struct dm_hash_table *lock_hash; |
| |
| /* Number of active nodes */ |
| static int num_nodes; |
| static unsigned int our_nodeid; |
| |
| static struct local_client *cluster_client; |
| |
| /* OpenAIS handles */ |
| static cpg_handle_t cpg_handle; |
| static SaLckHandleT lck_handle; |
| |
| static struct cpg_name cpg_group_name; |
| |
| /* Openais callback structs */ |
| cpg_callbacks_t openais_cpg_callbacks = { |
| .cpg_deliver_fn = openais_cpg_deliver_callback, |
| .cpg_confchg_fn = openais_cpg_confchg_callback, |
| }; |
| |
| struct node_info |
| { |
| enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state; |
| int nodeid; |
| }; |
| |
| struct lock_info |
| { |
| SaLckResourceHandleT res_handle; |
| SaLckLockIdT lock_id; |
| SaNameT lock_name; |
| }; |
| |
| /* Set errno to something approximating the right value and return 0 or -1 */ |
| static int ais_to_errno(SaAisErrorT err) |
| { |
| switch(err) |
| { |
| case SA_AIS_OK: |
| return 0; |
| case SA_AIS_ERR_LIBRARY: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_VERSION: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_INIT: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_TIMEOUT: |
| errno = ETIME; |
| break; |
| case SA_AIS_ERR_TRY_AGAIN: |
| errno = EAGAIN; |
| break; |
| case SA_AIS_ERR_INVALID_PARAM: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_NO_MEMORY: |
| errno = ENOMEM; |
| break; |
| case SA_AIS_ERR_BAD_HANDLE: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_BUSY: |
| errno = EBUSY; |
| break; |
| case SA_AIS_ERR_ACCESS: |
| errno = EPERM; |
| break; |
| case SA_AIS_ERR_NOT_EXIST: |
| errno = ENOENT; |
| break; |
| case SA_AIS_ERR_NAME_TOO_LONG: |
| errno = ENAMETOOLONG; |
| break; |
| case SA_AIS_ERR_EXIST: |
| errno = EEXIST; |
| break; |
| case SA_AIS_ERR_NO_SPACE: |
| errno = ENOSPC; |
| break; |
| case SA_AIS_ERR_INTERRUPT: |
| errno = EINTR; |
| break; |
| case SA_AIS_ERR_NAME_NOT_FOUND: |
| errno = ENOENT; |
| break; |
| case SA_AIS_ERR_NO_RESOURCES: |
| errno = ENOMEM; |
| break; |
| case SA_AIS_ERR_NOT_SUPPORTED: |
| errno = EOPNOTSUPP; |
| break; |
| case SA_AIS_ERR_BAD_OPERATION: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_FAILED_OPERATION: |
| errno = EIO; |
| break; |
| case SA_AIS_ERR_MESSAGE_ERROR: |
| errno = EIO; |
| break; |
| case SA_AIS_ERR_QUEUE_FULL: |
| errno = EXFULL; |
| break; |
| case SA_AIS_ERR_QUEUE_NOT_AVAILABLE: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_BAD_FLAGS: |
| errno = EINVAL; |
| break; |
| case SA_AIS_ERR_TOO_BIG: |
| errno = E2BIG; |
| break; |
| case SA_AIS_ERR_NO_SECTIONS: |
| errno = ENOMEM; |
| break; |
| default: |
| errno = EINVAL; |
| break; |
| } |
| return -1; |
| } |
| |
| static char *print_openais_csid(const char *csid) |
| { |
| static char buf[128]; |
| int id; |
| |
| memcpy(&id, csid, sizeof(int)); |
| sprintf(buf, "%d", id); |
| return buf; |
| } |
| |
| static int add_internal_client(int fd, fd_callback_t callback) |
| { |
| struct local_client *client; |
| |
| DEBUGLOG("Add_internal_client, fd = %d\n", fd); |
| |
| if (!(client = dm_zalloc(sizeof(*client)))) { |
| DEBUGLOG("malloc failed\n"); |
| return -1; |
| } |
| |
| client->fd = fd; |
| client->type = CLUSTER_INTERNAL; |
| client->callback = callback; |
| add_client(client); |
| |
| /* Set Close-on-exec */ |
| fcntl(fd, F_SETFD, 1); |
| |
| return 0; |
| } |
| |
| static void openais_cpg_deliver_callback (cpg_handle_t handle, |
| const struct cpg_name *groupName, |
| uint32_t nodeid, |
| uint32_t pid, |
| void *msg, |
| size_t msg_len) |
| { |
| int target_nodeid; |
| |
| memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN); |
| |
| DEBUGLOG("%u got message from nodeid %d for %d. len %" PRIsize_t "\n", |
| our_nodeid, nodeid, target_nodeid, msg_len-4); |
| |
| if (nodeid != our_nodeid) |
| if (target_nodeid == our_nodeid || target_nodeid == 0) |
| process_message(cluster_client, (char *)msg+OPENAIS_CSID_LEN, |
| msg_len-OPENAIS_CSID_LEN, (char*)&nodeid); |
| } |
| |
| static void openais_cpg_confchg_callback(cpg_handle_t handle, |
| const struct cpg_name *groupName, |
| const struct cpg_address *member_list, size_t member_list_entries, |
| const struct cpg_address *left_list, size_t left_list_entries, |
| const struct cpg_address *joined_list, size_t joined_list_entries) |
| { |
| int i; |
| struct node_info *ninfo; |
| |
| DEBUGLOG("confchg callback. %" PRIsize_t " joined, " |
| FMTsize_t " left, %" PRIsize_t " members\n", |
| joined_list_entries, left_list_entries, member_list_entries); |
| |
| for (i=0; i<joined_list_entries; i++) { |
| ninfo = dm_hash_lookup_binary(node_hash, |
| (char *)&joined_list[i].nodeid, |
| OPENAIS_CSID_LEN); |
| if (!ninfo) { |
| ninfo = malloc(sizeof(struct node_info)); |
| if (!ninfo) { |
| break; |
| } |
| else { |
| ninfo->nodeid = joined_list[i].nodeid; |
| dm_hash_insert_binary(node_hash, |
| (char *)&ninfo->nodeid, |
| OPENAIS_CSID_LEN, ninfo); |
| } |
| } |
| ninfo->state = NODE_CLVMD; |
| } |
| |
| for (i=0; i<left_list_entries; i++) { |
| ninfo = dm_hash_lookup_binary(node_hash, |
| (char *)&left_list[i].nodeid, |
| OPENAIS_CSID_LEN); |
| if (ninfo) |
| ninfo->state = NODE_DOWN; |
| } |
| |
| for (i=0; i<member_list_entries; i++) { |
| if (member_list[i].nodeid == 0) continue; |
| ninfo = dm_hash_lookup_binary(node_hash, |
| (char *)&member_list[i].nodeid, |
| OPENAIS_CSID_LEN); |
| if (!ninfo) { |
| ninfo = malloc(sizeof(struct node_info)); |
| if (!ninfo) { |
| break; |
| } |
| else { |
| ninfo->nodeid = member_list[i].nodeid; |
| dm_hash_insert_binary(node_hash, |
| (char *)&ninfo->nodeid, |
| OPENAIS_CSID_LEN, ninfo); |
| } |
| } |
| ninfo->state = NODE_CLVMD; |
| } |
| |
| num_nodes = member_list_entries; |
| } |
| |
| static int lck_dispatch(struct local_client *client, char *buf, int len, |
| const char *csid, struct local_client **new_client) |
| { |
| *new_client = NULL; |
| saLckDispatch(lck_handle, SA_DISPATCH_ONE); |
| return 1; |
| } |
| |
| static int _init_cluster(void) |
| { |
| SaAisErrorT err; |
| SaVersionT ver = { 'B', 1, 1 }; |
| int select_fd; |
| |
| node_hash = dm_hash_create(100); |
| lock_hash = dm_hash_create(10); |
| |
| err = cpg_initialize(&cpg_handle, |
| &openais_cpg_callbacks); |
| if (err != SA_AIS_OK) { |
| syslog(LOG_ERR, "Cannot initialise OpenAIS CPG service: %d", |
| err); |
| DEBUGLOG("Cannot initialise OpenAIS CPG service: %d", err); |
| return ais_to_errno(err); |
| } |
| |
| err = saLckInitialize(&lck_handle, |
| NULL, |
| &ver); |
| if (err != SA_AIS_OK) { |
| cpg_initialize(&cpg_handle, &openais_cpg_callbacks); |
| syslog(LOG_ERR, "Cannot initialise OpenAIS lock service: %d", |
| err); |
| DEBUGLOG("Cannot initialise OpenAIS lock service: %d\n\n", err); |
| return ais_to_errno(err); |
| } |
| |
| /* Connect to the clvmd group */ |
| strcpy((char *)cpg_group_name.value, "clvmd"); |
| cpg_group_name.length = strlen((char *)cpg_group_name.value); |
| err = cpg_join(cpg_handle, &cpg_group_name); |
| if (err != SA_AIS_OK) { |
| cpg_finalize(cpg_handle); |
| saLckFinalize(lck_handle); |
| syslog(LOG_ERR, "Cannot join clvmd process group"); |
| DEBUGLOG("Cannot join clvmd process group: %d\n", err); |
| return ais_to_errno(err); |
| } |
| |
| err = cpg_local_get(cpg_handle, |
| &our_nodeid); |
| if (err != SA_AIS_OK) { |
| cpg_finalize(cpg_handle); |
| saLckFinalize(lck_handle); |
| syslog(LOG_ERR, "Cannot get local node id\n"); |
| return ais_to_errno(err); |
| } |
| DEBUGLOG("Our local node id is %d\n", our_nodeid); |
| |
| saLckSelectionObjectGet(lck_handle, (SaSelectionObjectT *)&select_fd); |
| add_internal_client(select_fd, lck_dispatch); |
| |
| DEBUGLOG("Connected to OpenAIS\n"); |
| |
| return 0; |
| } |
| |
| static void _cluster_closedown(void) |
| { |
| saLckFinalize(lck_handle); |
| cpg_finalize(cpg_handle); |
| } |
| |
| static void _get_our_csid(char *csid) |
| { |
| memcpy(csid, &our_nodeid, sizeof(int)); |
| } |
| |
| /* OpenAIS doesn't really have nmode names so we |
| just use the node ID in hex instead */ |
| static int _csid_from_name(char *csid, const char *name) |
| { |
| int nodeid; |
| struct node_info *ninfo; |
| |
| if (sscanf(name, "%x", &nodeid) == 1) { |
| ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); |
| if (ninfo) |
| return nodeid; |
| } |
| return -1; |
| } |
| |
| static int _name_from_csid(const char *csid, char *name) |
| { |
| struct node_info *ninfo; |
| |
| ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); |
| if (!ninfo) |
| { |
| sprintf(name, "UNKNOWN %s", print_openais_csid(csid)); |
| return -1; |
| } |
| |
| sprintf(name, "%x", ninfo->nodeid); |
| return 0; |
| } |
| |
| static int _get_num_nodes() |
| { |
| DEBUGLOG("num_nodes = %d\n", num_nodes); |
| return num_nodes; |
| } |
| |
| /* Node is now known to be running a clvmd */ |
| static void _add_up_node(const char *csid) |
| { |
| struct node_info *ninfo; |
| |
| ninfo = dm_hash_lookup_binary(node_hash, csid, OPENAIS_CSID_LEN); |
| if (!ninfo) { |
| DEBUGLOG("openais_add_up_node no node_hash entry for csid %s\n", |
| print_openais_csid(csid)); |
| return; |
| } |
| |
| DEBUGLOG("openais_add_up_node %d\n", ninfo->nodeid); |
| |
| ninfo->state = NODE_CLVMD; |
| |
| return; |
| } |
| |
| /* Call a callback for each node, so the caller knows whether it's up or down */ |
| static int _cluster_do_node_callback(struct local_client *master_client, |
| void (*callback)(struct local_client *, |
| const char *csid, int node_up)) |
| { |
| struct dm_hash_node *hn; |
| struct node_info *ninfo; |
| int somedown = 0; |
| |
| dm_hash_iterate(hn, node_hash) |
| { |
| char csid[OPENAIS_CSID_LEN]; |
| |
| ninfo = dm_hash_get_data(node_hash, hn); |
| memcpy(csid, dm_hash_get_key(node_hash, hn), OPENAIS_CSID_LEN); |
| |
| DEBUGLOG("down_callback. node %d, state = %d\n", ninfo->nodeid, |
| ninfo->state); |
| |
| if (ninfo->state != NODE_DOWN) |
| callback(master_client, csid, ninfo->state == NODE_CLVMD); |
| if (ninfo->state != NODE_CLVMD) |
| somedown = -1; |
| } |
| return somedown; |
| } |
| |
| /* Real locking */ |
| static int _lock_resource(char *resource, int mode, int flags, int *lockid) |
| { |
| struct lock_info *linfo; |
| SaLckResourceHandleT res_handle; |
| SaAisErrorT err; |
| SaLckLockIdT lock_id; |
| SaLckLockStatusT lockStatus; |
| |
| /* This needs to be converted from DLM/LVM2 value for OpenAIS LCK */ |
| if (flags & LCK_NONBLOCK) flags = SA_LCK_LOCK_NO_QUEUE; |
| |
| linfo = malloc(sizeof(struct lock_info)); |
| if (!linfo) |
| return -1; |
| |
| DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode); |
| |
| linfo->lock_name.length = strlen(resource)+1; |
| strcpy((char *)linfo->lock_name.value, resource); |
| |
| err = saLckResourceOpen(lck_handle, &linfo->lock_name, |
| SA_LCK_RESOURCE_CREATE, TIMEOUT, &res_handle); |
| if (err != SA_AIS_OK) |
| { |
| DEBUGLOG("ResourceOpen returned %d\n", err); |
| free(linfo); |
| return ais_to_errno(err); |
| } |
| |
| err = saLckResourceLock( |
| res_handle, |
| &lock_id, |
| mode, |
| flags, |
| 0, |
| SA_TIME_END, |
| &lockStatus); |
| if (err != SA_AIS_OK && lockStatus != SA_LCK_LOCK_GRANTED) |
| { |
| free(linfo); |
| saLckResourceClose(res_handle); |
| return ais_to_errno(err); |
| } |
| |
| /* Wait for it to complete */ |
| |
| DEBUGLOG("lock_resource returning %d, lock_id=%" PRIx64 "\n", |
| err, lock_id); |
| |
| linfo->lock_id = lock_id; |
| linfo->res_handle = res_handle; |
| |
| dm_hash_insert(lock_hash, resource, linfo); |
| |
| return ais_to_errno(err); |
| } |
| |
| |
| static int _unlock_resource(char *resource, int lockid) |
| { |
| SaAisErrorT err; |
| struct lock_info *linfo; |
| |
| DEBUGLOG("unlock_resource %s\n", resource); |
| linfo = dm_hash_lookup(lock_hash, resource); |
| if (!linfo) |
| return 0; |
| |
| DEBUGLOG("unlock_resource: lockid: %" PRIx64 "\n", linfo->lock_id); |
| err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END); |
| if (err != SA_AIS_OK) |
| { |
| DEBUGLOG("Unlock returned %d\n", err); |
| return ais_to_errno(err); |
| } |
| |
| /* Release the resource */ |
| dm_hash_remove(lock_hash, resource); |
| saLckResourceClose(linfo->res_handle); |
| free(linfo); |
| |
| return ais_to_errno(err); |
| } |
| |
| static int _sync_lock(const char *resource, int mode, int flags, int *lockid) |
| { |
| int status; |
| char lock1[strlen(resource)+3]; |
| char lock2[strlen(resource)+3]; |
| |
| snprintf(lock1, sizeof(lock1), "%s-1", resource); |
| snprintf(lock2, sizeof(lock2), "%s-2", resource); |
| |
| switch (mode) |
| { |
| case LCK_EXCL: |
| status = _lock_resource(lock1, SA_LCK_EX_LOCK_MODE, flags, lockid); |
| if (status) |
| goto out; |
| |
| /* If we can't get this lock too then bail out */ |
| status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, LCK_NONBLOCK, |
| lockid); |
| if (status == SA_LCK_LOCK_NOT_QUEUED) |
| { |
| _unlock_resource(lock1, *lockid); |
| status = -1; |
| errno = EAGAIN; |
| } |
| break; |
| |
| case LCK_PREAD: |
| case LCK_READ: |
| status = _lock_resource(lock1, SA_LCK_PR_LOCK_MODE, flags, lockid); |
| if (status) |
| goto out; |
| _unlock_resource(lock2, *lockid); |
| break; |
| |
| case LCK_WRITE: |
| status = _lock_resource(lock2, SA_LCK_EX_LOCK_MODE, flags, lockid); |
| if (status) |
| goto out; |
| _unlock_resource(lock1, *lockid); |
| break; |
| |
| default: |
| status = -1; |
| errno = EINVAL; |
| break; |
| } |
| out: |
| *lockid = mode; |
| return status; |
| } |
| |
| static int _sync_unlock(const char *resource, int lockid) |
| { |
| int status = 0; |
| char lock1[strlen(resource)+3]; |
| char lock2[strlen(resource)+3]; |
| |
| snprintf(lock1, sizeof(lock1), "%s-1", resource); |
| snprintf(lock2, sizeof(lock2), "%s-2", resource); |
| |
| _unlock_resource(lock1, lockid); |
| _unlock_resource(lock2, lockid); |
| |
| return status; |
| } |
| |
| /* We are always quorate ! */ |
| static int _is_quorate() |
| { |
| return 1; |
| } |
| |
| static int _get_main_cluster_fd(void) |
| { |
| int select_fd; |
| |
| cpg_fd_get(cpg_handle, &select_fd); |
| return select_fd; |
| } |
| |
| static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, |
| const char *csid, |
| struct local_client **new_client) |
| { |
| cluster_client = fd; |
| *new_client = NULL; |
| cpg_dispatch(cpg_handle, SA_DISPATCH_ONE); |
| return 1; |
| } |
| |
| static int _cluster_send_message(const void *buf, int msglen, const char *csid, |
| const char *errtext) |
| { |
| struct iovec iov[2]; |
| SaAisErrorT err; |
| int target_node; |
| |
| if (csid) |
| memcpy(&target_node, csid, OPENAIS_CSID_LEN); |
| else |
| target_node = 0; |
| |
| iov[0].iov_base = &target_node; |
| iov[0].iov_len = sizeof(int); |
| iov[1].iov_base = (char *)buf; |
| iov[1].iov_len = msglen; |
| |
| err = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2); |
| return ais_to_errno(err); |
| } |
| |
| /* We don't have a cluster name to report here */ |
| static int _get_cluster_name(char *buf, int buflen) |
| { |
| strncpy(buf, "OpenAIS", buflen); |
| return 0; |
| } |
| |
| static struct cluster_ops _cluster_openais_ops = { |
| .name = "openais", |
| .cluster_init_completed = NULL, |
| .cluster_send_message = _cluster_send_message, |
| .name_from_csid = _name_from_csid, |
| .csid_from_name = _csid_from_name, |
| .get_num_nodes = _get_num_nodes, |
| .cluster_fd_callback = _cluster_fd_callback, |
| .get_main_cluster_fd = _get_main_cluster_fd, |
| .cluster_do_node_callback = _cluster_do_node_callback, |
| .is_quorate = _is_quorate, |
| .get_our_csid = _get_our_csid, |
| .add_up_node = _add_up_node, |
| .reread_config = NULL, |
| .cluster_closedown = _cluster_closedown, |
| .get_cluster_name = _get_cluster_name, |
| .sync_lock = _sync_lock, |
| .sync_unlock = _sync_unlock, |
| }; |
| |
| struct cluster_ops *init_openais_cluster(void) |
| { |
| if (!_init_cluster()) |
| return &_cluster_openais_ops; |
| |
| return NULL; |
| } |