blob: 9ad1da27198188b8a037f55ca082c56474e2b37d [file] [log] [blame]
/**
* @file odb.h
* This file contains various definitions and interface for management
* of in-memory, through mmaped file, growable hash table, that stores
* sample files.
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author Philippe Elie
*/
#ifndef ODB_HASH_H
#define ODB_HASH_H
#include <stddef.h>
#include <stdint.h>
#include "op_list.h"
/** the type of key. 64-bit because CG needs 32-bit pair {from,to} */
typedef uint64_t odb_key_t;
/** the type of an information in the database */
typedef unsigned int odb_value_t;
/** the type of index (node number), list are implemented through index */
typedef unsigned int odb_index_t;
/** the type store node number */
typedef odb_index_t odb_node_nr_t;
/** store the hash mask, hash table size are always power of two */
typedef odb_index_t odb_hash_mask_t;
/* there is (bucket factor * nr node) entry in hash table, this can seem
* excessive but hash coding eip don't give a good distributions and our
* goal is to get a O(1) amortized insert time. bucket factor must be a
* power of two. FIXME: see big comment in odb_hash_add_node, you must
* re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you
* want to read the comment in odb_hash_add_node() if you tune this define)
*/
#define BUCKET_FACTOR 1
/** a db hash node */
typedef struct {
odb_key_t key; /**< eip */
odb_value_t value; /**< samples count */
odb_index_t next; /**< next entry for this bucket */
} odb_node_t;
/** the minimal information which must be stored in the file to reload
* properly the data base, following this header is the node array then
* the hash table (when growing we avoid to copy node array)
*/
typedef struct {
odb_node_nr_t size; /**< in node nr (power of two) */
odb_node_nr_t current_size; /**< nr used node + 1, node 0 unused */
int padding[6]; /**< for padding and future use */
} odb_descr_t;
/** a "database". this is an in memory only description.
*
* We allow to manage a database inside a mapped file with an "header" of
* unknown size so odb_open get a parameter to specify the size of this header.
* A typical use is:
*
* struct header { int etc; ... };
* odb_open(&hash, filename, ODB_RW, sizeof(header));
* so on this library have no dependency on the header type.
*
* the internal memory layout from base_memory is:
* the unknown header (sizeof_header)
* odb_descr_t
* the node array: (descr->size * sizeof(odb_node_t) entries
* the hash table: array of odb_index_t indexing the node array
* (descr->size * BUCKET_FACTOR) entries
*/
typedef struct odb_data {
odb_node_t * node_base; /**< base memory area of the page */
odb_index_t * hash_base; /**< base memory of hash table */
odb_descr_t * descr; /**< the current state of database */
odb_hash_mask_t hash_mask; /**< == descr->size - 1 */
unsigned int sizeof_header; /**< from base_memory to odb header */
unsigned int offset_node; /**< from base_memory to node array */
void * base_memory; /**< base memory of the maped memory */
int fd; /**< mmaped memory file descriptor */
char * filename; /**< full path name of sample file */
int ref_count; /**< reference count */
struct list_head list; /**< hash bucket list */
} odb_data_t;
typedef struct {
odb_data_t * data;
} odb_t;
#ifdef __cplusplus
extern "C" {
#endif
/* db_manage.c */
/** how to open the DB file */
enum odb_rw {
ODB_RDONLY = 0, /**< open for read only */
ODB_RDWR = 1 /**< open for read and/or write */
};
/**
* odb_init - initialize a DB file
* @param odb the DB file to init
*/
void odb_init(odb_t * odb);
/**
* odb_open - open a DB file
* @param odb the data base object to setup
* @param filename the filename where go the maped memory
* @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY
* @param sizeof_header size of the file header if any
*
* The sizeof_header parameter allows the data file to have a header
* at the start of the file which is skipped.
* odb_open() always preallocate a few number of pages.
* returns 0 on success, errno on failure
*/
int odb_open(odb_t * odb, char const * filename,
enum odb_rw rw, size_t sizeof_header);
/** Close the given ODB file */
void odb_close(odb_t * odb);
/** return the number of times this sample file is open */
int odb_open_count(odb_t const * odb);
/** return the start of the mapped data */
void * odb_get_data(odb_t * odb);
/** issue a msync on the used size of the mmaped file */
void odb_sync(odb_t const * odb);
/**
* grow the hashtable in such way current_size is the index of the first free
* node. Take care all node pointer can be invalidated by this call.
*
* Node allocation is done in a two step way 1st) ensure a free node exist
* eventually, caller can setup it, 2nd) commit the node allocation with
* odb_commit_reservation().
* This is done in this way to ensure node setup is visible from another
* process like pp tools in an atomic way.
*
* returns 0 on success, non zero on failure in this case this function do
* nothing and errno is set by the first libc call failure allowing to retry
* after cleanup some program resource.
*/
int odb_grow_hashtable(odb_data_t * data);
/**
* commit a previously successfull node reservation. This can't fail.
*/
static __inline void odb_commit_reservation(odb_data_t * data)
{
++data->descr->current_size;
}
/** "immpossible" node number to indicate an error from odb_hash_add_node() */
#define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1)
/* db_debug.c */
/** check that the hash is well built */
int odb_check_hash(odb_t const * odb);
/* db_stat.c */
typedef struct odb_hash_stat_t odb_hash_stat_t;
odb_hash_stat_t * odb_hash_stat(odb_t const * odb);
void odb_hash_display_stat(odb_hash_stat_t const * stats);
void odb_hash_free_stat(odb_hash_stat_t * stats);
/* db_insert.c */
/** update info at key by incrementing its associated value by one,
* if the key does not exist a new node is created and the value associated
* is set to one.
*
* returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
*/
int odb_update_node(odb_t * odb, odb_key_t key);
/**
* odb_update_node_with_offset
* @param odb the data base object to setup
* @param key the hash key
* @param offset the offset to be added
*
* update info at key by adding the specified offset to its associated value,
* if the key does not exist a new node is created and the value associated
* is set to offset.
*
* returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
*/
int odb_update_node_with_offset(odb_t * odb,
odb_key_t key,
unsigned long int offset);
/** Add a new node w/o regarding if a node with the same key already exists
*
* returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
*/
int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value);
/* db_travel.c */
/**
* return a base pointer to the node array and number of node in this array
* caller then will iterate through:
*
* odb_node_nr_t node_nr, pos;
* odb_node_t * node = odb_get_iterator(odb, &node_nr);
* for ( pos = 0 ; pos < node_nr ; ++pos)
* // do something
*
* note than caller does not need to filter nil key as it's a valid key,
* The returned range is all valid (i.e. should never contain zero value).
*/
odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr);
static __inline unsigned int
odb_do_hash(odb_data_t const * data, odb_key_t value)
{
/* FIXME: better hash for eip value, needs to instrument code
* and do a lot of tests ... */
/* trying to combine high order bits his a no-op: inside a binary image
* high order bits don't vary a lot, hash table start with 7 bits mask
* so this hash coding use bits 0-7, 8-15. Hash table is stored in
* files avoiding to rebuilding them at profiling re-start so
* on changing do_hash() change the file format!
*/
uint32_t temp = (value >> 32) ^ value;
return ((temp << 0) ^ (temp >> 8)) & data->hash_mask;
}
#ifdef __cplusplus
}
#endif
#endif /* !ODB_H */