blob: 75d3fe80b05515e33d2b791cbe5cf54eb6be401f [file] [log] [blame]
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* Copyright (C) 2004, 2005, 2012 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/configfs.h>
#include "heartbeat.h"
#include "tcp.h"
#include "nodemanager.h"
#include "masklog.h"
/*
* The first heartbeat pass had one global thread that would serialize all hb
* callback calls. This global serializing sem should only be removed once
* we've made sure that all callees can deal with being called concurrently
* from multiple hb region threads.
*/
static DECLARE_RWSEM(r2hb_callback_sem);
/*
* multiple hb threads are watching multiple regions. A node is live
* whenever any of the threads sees activity from the node in its region.
*/
static DEFINE_SPINLOCK(r2hb_live_lock);
static unsigned long r2hb_live_node_bitmap[BITS_TO_LONGS(R2NM_MAX_NODES)];
static struct r2hb_callback {
struct list_head list;
} r2hb_callbacks[R2HB_NUM_CB];
enum r2hb_heartbeat_modes {
R2HB_HEARTBEAT_LOCAL = 0,
R2HB_HEARTBEAT_GLOBAL,
R2HB_HEARTBEAT_NUM_MODES,
};
char *r2hb_heartbeat_mode_desc[R2HB_HEARTBEAT_NUM_MODES] = {
"local", /* R2HB_HEARTBEAT_LOCAL */
"global", /* R2HB_HEARTBEAT_GLOBAL */
};
unsigned int r2hb_dead_threshold = R2HB_DEFAULT_DEAD_THRESHOLD;
unsigned int r2hb_heartbeat_mode = R2HB_HEARTBEAT_LOCAL;
/* Only sets a new threshold if there are no active regions.
*
* No locking or otherwise interesting code is required for reading
* r2hb_dead_threshold as it can't change once regions are active and
* it's not interesting to anyone until then anyway. */
static void r2hb_dead_threshold_set(unsigned int threshold)
{
if (threshold > R2HB_MIN_DEAD_THRESHOLD) {
spin_lock(&r2hb_live_lock);
r2hb_dead_threshold = threshold;
spin_unlock(&r2hb_live_lock);
}
}
static int r2hb_global_hearbeat_mode_set(unsigned int hb_mode)
{
int ret = -1;
if (hb_mode < R2HB_HEARTBEAT_NUM_MODES) {
spin_lock(&r2hb_live_lock);
r2hb_heartbeat_mode = hb_mode;
ret = 0;
spin_unlock(&r2hb_live_lock);
}
return ret;
}
void r2hb_exit(void)
{
}
int r2hb_init(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(r2hb_callbacks); i++)
INIT_LIST_HEAD(&r2hb_callbacks[i].list);
memset(r2hb_live_node_bitmap, 0, sizeof(r2hb_live_node_bitmap));
return 0;
}
/* if we're already in a callback then we're already serialized by the sem */
static void r2hb_fill_node_map_from_callback(unsigned long *map,
unsigned bytes)
{
BUG_ON(bytes < (BITS_TO_LONGS(R2NM_MAX_NODES) * sizeof(unsigned long)));
memcpy(map, &r2hb_live_node_bitmap, bytes);
}
/*
* get a map of all nodes that are heartbeating in any regions
*/
void r2hb_fill_node_map(unsigned long *map, unsigned bytes)
{
/* callers want to serialize this map and callbacks so that they
* can trust that they don't miss nodes coming to the party */
down_read(&r2hb_callback_sem);
spin_lock(&r2hb_live_lock);
r2hb_fill_node_map_from_callback(map, bytes);
spin_unlock(&r2hb_live_lock);
up_read(&r2hb_callback_sem);
}
EXPORT_SYMBOL_GPL(r2hb_fill_node_map);
/*
* heartbeat configfs bits. The heartbeat set is a default set under
* the cluster set in nodemanager.c.
*/
/* heartbeat set */
struct r2hb_hb_group {
struct config_group hs_group;
/* some stuff? */
};
static struct r2hb_hb_group *to_r2hb_hb_group(struct config_group *group)
{
return group ?
container_of(group, struct r2hb_hb_group, hs_group)
: NULL;
}
static struct config_item r2hb_config_item;
static struct config_item *r2hb_hb_group_make_item(struct config_group *group,
const char *name)
{
int ret;
if (strlen(name) > R2HB_MAX_REGION_NAME_LEN) {
ret = -ENAMETOOLONG;
goto free;
}
config_item_put(&r2hb_config_item);
return &r2hb_config_item;
free:
return ERR_PTR(ret);
}
static void r2hb_hb_group_drop_item(struct config_group *group,
struct config_item *item)
{
if (r2hb_global_heartbeat_active()) {
pr_notice("ramster: Heartbeat %s on region %s (%s)\n",
"stopped/aborted", config_item_name(item),
"no region");
}
config_item_put(item);
}
struct r2hb_hb_group_attribute {
struct configfs_attribute attr;
ssize_t (*show)(struct r2hb_hb_group *, char *);
ssize_t (*store)(struct r2hb_hb_group *, const char *, size_t);
};
static ssize_t r2hb_hb_group_show(struct config_item *item,
struct configfs_attribute *attr,
char *page)
{
struct r2hb_hb_group *reg = to_r2hb_hb_group(to_config_group(item));
struct r2hb_hb_group_attribute *r2hb_hb_group_attr =
container_of(attr, struct r2hb_hb_group_attribute, attr);
ssize_t ret = 0;
if (r2hb_hb_group_attr->show)
ret = r2hb_hb_group_attr->show(reg, page);
return ret;
}
static ssize_t r2hb_hb_group_store(struct config_item *item,
struct configfs_attribute *attr,
const char *page, size_t count)
{
struct r2hb_hb_group *reg = to_r2hb_hb_group(to_config_group(item));
struct r2hb_hb_group_attribute *r2hb_hb_group_attr =
container_of(attr, struct r2hb_hb_group_attribute, attr);
ssize_t ret = -EINVAL;
if (r2hb_hb_group_attr->store)
ret = r2hb_hb_group_attr->store(reg, page, count);
return ret;
}
static ssize_t r2hb_hb_group_threshold_show(struct r2hb_hb_group *group,
char *page)
{
return sprintf(page, "%u\n", r2hb_dead_threshold);
}
static ssize_t r2hb_hb_group_threshold_store(struct r2hb_hb_group *group,
const char *page,
size_t count)
{
unsigned long tmp;
char *p = (char *)page;
int err;
err = kstrtoul(p, 10, &tmp);
if (err)
return err;
/* this will validate ranges for us. */
r2hb_dead_threshold_set((unsigned int) tmp);
return count;
}
static
ssize_t r2hb_hb_group_mode_show(struct r2hb_hb_group *group,
char *page)
{
return sprintf(page, "%s\n",
r2hb_heartbeat_mode_desc[r2hb_heartbeat_mode]);
}
static
ssize_t r2hb_hb_group_mode_store(struct r2hb_hb_group *group,
const char *page, size_t count)
{
unsigned int i;
int ret;
size_t len;
len = (page[count - 1] == '\n') ? count - 1 : count;
if (!len)
return -EINVAL;
for (i = 0; i < R2HB_HEARTBEAT_NUM_MODES; ++i) {
if (strnicmp(page, r2hb_heartbeat_mode_desc[i], len))
continue;
ret = r2hb_global_hearbeat_mode_set(i);
if (!ret)
pr_notice("ramster: Heartbeat mode set to %s\n",
r2hb_heartbeat_mode_desc[i]);
return count;
}
return -EINVAL;
}
static struct r2hb_hb_group_attribute r2hb_hb_group_attr_threshold = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "dead_threshold",
.ca_mode = S_IRUGO | S_IWUSR },
.show = r2hb_hb_group_threshold_show,
.store = r2hb_hb_group_threshold_store,
};
static struct r2hb_hb_group_attribute r2hb_hb_group_attr_mode = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "mode",
.ca_mode = S_IRUGO | S_IWUSR },
.show = r2hb_hb_group_mode_show,
.store = r2hb_hb_group_mode_store,
};
static struct configfs_attribute *r2hb_hb_group_attrs[] = {
&r2hb_hb_group_attr_threshold.attr,
&r2hb_hb_group_attr_mode.attr,
NULL,
};
static struct configfs_item_operations r2hb_hearbeat_group_item_ops = {
.show_attribute = r2hb_hb_group_show,
.store_attribute = r2hb_hb_group_store,
};
static struct configfs_group_operations r2hb_hb_group_group_ops = {
.make_item = r2hb_hb_group_make_item,
.drop_item = r2hb_hb_group_drop_item,
};
static struct config_item_type r2hb_hb_group_type = {
.ct_group_ops = &r2hb_hb_group_group_ops,
.ct_item_ops = &r2hb_hearbeat_group_item_ops,
.ct_attrs = r2hb_hb_group_attrs,
.ct_owner = THIS_MODULE,
};
/* this is just here to avoid touching group in heartbeat.h which the
* entire damn world #includes */
struct config_group *r2hb_alloc_hb_set(void)
{
struct r2hb_hb_group *hs = NULL;
struct config_group *ret = NULL;
hs = kzalloc(sizeof(struct r2hb_hb_group), GFP_KERNEL);
if (hs == NULL)
goto out;
config_group_init_type_name(&hs->hs_group, "heartbeat",
&r2hb_hb_group_type);
ret = &hs->hs_group;
out:
if (ret == NULL)
kfree(hs);
return ret;
}
void r2hb_free_hb_set(struct config_group *group)
{
struct r2hb_hb_group *hs = to_r2hb_hb_group(group);
kfree(hs);
}
/* hb callback registration and issuing */
static struct r2hb_callback *hbcall_from_type(enum r2hb_callback_type type)
{
if (type == R2HB_NUM_CB)
return ERR_PTR(-EINVAL);
return &r2hb_callbacks[type];
}
void r2hb_setup_callback(struct r2hb_callback_func *hc,
enum r2hb_callback_type type,
r2hb_cb_func *func,
void *data,
int priority)
{
INIT_LIST_HEAD(&hc->hc_item);
hc->hc_func = func;
hc->hc_data = data;
hc->hc_priority = priority;
hc->hc_type = type;
hc->hc_magic = R2HB_CB_MAGIC;
}
EXPORT_SYMBOL_GPL(r2hb_setup_callback);
int r2hb_register_callback(const char *region_uuid,
struct r2hb_callback_func *hc)
{
struct r2hb_callback_func *tmp;
struct list_head *iter;
struct r2hb_callback *hbcall;
int ret;
BUG_ON(hc->hc_magic != R2HB_CB_MAGIC);
BUG_ON(!list_empty(&hc->hc_item));
hbcall = hbcall_from_type(hc->hc_type);
if (IS_ERR(hbcall)) {
ret = PTR_ERR(hbcall);
goto out;
}
down_write(&r2hb_callback_sem);
list_for_each(iter, &hbcall->list) {
tmp = list_entry(iter, struct r2hb_callback_func, hc_item);
if (hc->hc_priority < tmp->hc_priority) {
list_add_tail(&hc->hc_item, iter);
break;
}
}
if (list_empty(&hc->hc_item))
list_add_tail(&hc->hc_item, &hbcall->list);
up_write(&r2hb_callback_sem);
ret = 0;
out:
mlog(ML_CLUSTER, "returning %d on behalf of %p for funcs %p\n",
ret, __builtin_return_address(0), hc);
return ret;
}
EXPORT_SYMBOL_GPL(r2hb_register_callback);
void r2hb_unregister_callback(const char *region_uuid,
struct r2hb_callback_func *hc)
{
BUG_ON(hc->hc_magic != R2HB_CB_MAGIC);
mlog(ML_CLUSTER, "on behalf of %p for funcs %p\n",
__builtin_return_address(0), hc);
/* XXX Can this happen _with_ a region reference? */
if (list_empty(&hc->hc_item))
return;
down_write(&r2hb_callback_sem);
list_del_init(&hc->hc_item);
up_write(&r2hb_callback_sem);
}
EXPORT_SYMBOL_GPL(r2hb_unregister_callback);
int r2hb_check_node_heartbeating_from_callback(u8 node_num)
{
unsigned long testing_map[BITS_TO_LONGS(R2NM_MAX_NODES)];
r2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
"node (%u) does not have heartbeating enabled.\n",
node_num);
return 0;
}
return 1;
}
EXPORT_SYMBOL_GPL(r2hb_check_node_heartbeating_from_callback);
void r2hb_stop_all_regions(void)
{
}
EXPORT_SYMBOL_GPL(r2hb_stop_all_regions);
/*
* this is just a hack until we get the plumbing which flips file systems
* read only and drops the hb ref instead of killing the node dead.
*/
int r2hb_global_heartbeat_active(void)
{
return (r2hb_heartbeat_mode == R2HB_HEARTBEAT_GLOBAL);
}
EXPORT_SYMBOL(r2hb_global_heartbeat_active);
/* added for RAMster */
void r2hb_manual_set_node_heartbeating(int node_num)
{
if (node_num < R2NM_MAX_NODES)
set_bit(node_num, r2hb_live_node_bitmap);
}
EXPORT_SYMBOL(r2hb_manual_set_node_heartbeating);