| /* Copyright (C) 2002, 2005 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, write to the Free |
| Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307 USA. */ |
| |
| #define _GNU_SOURCE 1 |
| #include <argp.h> |
| #include <error.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <inttypes.h> |
| #include <limits.h> |
| #include <pthread.h> |
| #include <signal.h> |
| #include <stdbool.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <time.h> |
| #include <unistd.h> |
| #include <sys/param.h> |
| #include <sys/types.h> |
| |
| #ifndef MAX_THREADS |
| # define MAX_THREADS 100000 |
| #endif |
| #ifndef DEFAULT_THREADS |
| # define DEFAULT_THREADS 50 |
| #endif |
| |
| |
| #define OPT_TO_THREAD 300 |
| #define OPT_TO_PROCESS 301 |
| #define OPT_SYNC_SIGNAL 302 |
| #define OPT_SYNC_JOIN 303 |
| #define OPT_TOPLEVEL 304 |
| |
| |
| static const struct argp_option options[] = |
| { |
| { NULL, 0, NULL, 0, "\ |
| This is a test for threads so we allow ther user to selection the number of \ |
| threads which are used at any one time. Independently the total number of \ |
| rounds can be selected. This is the total number of threads which will have \ |
| run when the process terminates:" }, |
| { "threads", 't', "NUMBER", 0, "Number of threads used at once" }, |
| { "starts", 's', "NUMBER", 0, "Total number of working threads" }, |
| { "toplevel", OPT_TOPLEVEL, "NUMBER", 0, |
| "Number of toplevel threads which start the other threads; this \ |
| implies --sync-join" }, |
| |
| { NULL, 0, NULL, 0, "\ |
| Each thread can do one of two things: sleep or do work. The latter is 100% \ |
| CPU bound. The work load is the probability a thread does work. All values \ |
| from zero to 100 (inclusive) are valid. How often each thread repeats this \ |
| can be determined by the number of rounds. The work cost determines how long \ |
| each work session (not sleeping) takes. If it is zero a thread would \ |
| effectively nothing. By setting the number of rounds to zero the thread \ |
| does no work at all and pure thread creation times can be measured." }, |
| { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" }, |
| { "workcost", 'c', "NUMBER", 0, |
| "Factor in the cost of each round of working" }, |
| { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" }, |
| |
| { NULL, 0, NULL, 0, "\ |
| There are a number of different methods how thread creation can be \ |
| synchronized. Synchronization is necessary since the number of concurrently \ |
| running threads is limited." }, |
| { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0, |
| "Synchronize using a signal (default)" }, |
| { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" }, |
| |
| { NULL, 0, NULL, 0, "\ |
| One parameter for each threads execution is the size of the stack. If this \ |
| parameter is not used the system's default stack size is used. If many \ |
| threads are used the stack size should be chosen quite small." }, |
| { "stacksize", 'S', "BYTES", 0, "Size of threads stack" }, |
| { "guardsize", 'g', "BYTES", 0, |
| "Size of stack guard area; must fit into the stack" }, |
| |
| { NULL, 0, NULL, 0, "Signal options:" }, |
| { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" }, |
| { "to-process", OPT_TO_PROCESS, NULL, 0, |
| "Send signal to process (default)" }, |
| |
| { NULL, 0, NULL, 0, "Administrative options:" }, |
| { "progress", 'p', NULL, 0, "Show signs of progress" }, |
| { "timing", 'T', NULL, 0, |
| "Measure time from startup to the last thread finishing" }, |
| { NULL, 0, NULL, 0, NULL } |
| }; |
| |
| /* Prototype for option handler. */ |
| static error_t parse_opt (int key, char *arg, struct argp_state *state); |
| |
| /* Data structure to communicate with argp functions. */ |
| static struct argp argp = |
| { |
| options, parse_opt |
| }; |
| |
| |
| static unsigned long int threads = DEFAULT_THREADS; |
| static unsigned long int workload = 75; |
| static unsigned long int workcost = 20; |
| static unsigned long int rounds = 10; |
| static long int starts = 5000; |
| static unsigned long int stacksize; |
| static long int guardsize = -1; |
| static bool progress; |
| static bool timing; |
| static bool to_thread; |
| static unsigned long int toplevel = 1; |
| |
| |
| static long int running; |
| static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER; |
| |
| static pid_t pid; |
| static pthread_t tmain; |
| |
| static clockid_t cl; |
| static struct timespec start_time; |
| |
| |
| static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER; |
| unsigned int sum; |
| |
| static enum |
| { |
| sync_signal, |
| sync_join |
| } |
| sync_method; |
| |
| |
| /* We use 64bit values for the times. */ |
| typedef unsigned long long int hp_timing_t; |
| |
| |
| /* Attributes for all created threads. */ |
| static pthread_attr_t attr; |
| |
| |
| static void * |
| work (void *arg) |
| { |
| unsigned long int i; |
| unsigned int state = (unsigned long int) arg; |
| |
| for (i = 0; i < rounds; ++i) |
| { |
| /* Determine what to do. */ |
| unsigned int rnum; |
| |
| /* Uniform distribution. */ |
| do |
| rnum = rand_r (&state); |
| while (rnum >= UINT_MAX - (UINT_MAX % 100)); |
| |
| rnum %= 100; |
| |
| if (rnum < workload) |
| { |
| int j; |
| int a[4] = { i, rnum, i + rnum, rnum - i }; |
| |
| if (progress) |
| write (STDERR_FILENO, "c", 1); |
| |
| for (j = 0; j < workcost; ++j) |
| { |
| a[0] += a[3] >> 12; |
| a[1] += a[2] >> 20; |
| a[2] += a[1] ^ 0x3423423; |
| a[3] += a[0] - a[1]; |
| } |
| |
| pthread_mutex_lock (&sum_mutex); |
| sum += a[0] + a[1] + a[2] + a[3]; |
| pthread_mutex_unlock (&sum_mutex); |
| } |
| else |
| { |
| /* Just sleep. */ |
| struct timespec tv; |
| |
| tv.tv_sec = 0; |
| tv.tv_nsec = 10000000; |
| |
| if (progress) |
| write (STDERR_FILENO, "w", 1); |
| |
| nanosleep (&tv, NULL); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| |
| static void * |
| thread_function (void *arg) |
| { |
| work (arg); |
| |
| pthread_mutex_lock (&running_mutex); |
| if (--running <= 0 && starts <= 0) |
| { |
| /* We are done. */ |
| if (progress) |
| write (STDERR_FILENO, "\n", 1); |
| |
| if (timing) |
| { |
| struct timespec end_time; |
| |
| if (clock_gettime (cl, &end_time) == 0) |
| { |
| end_time.tv_sec -= start_time.tv_sec; |
| end_time.tv_nsec -= start_time.tv_nsec; |
| if (end_time.tv_nsec < 0) |
| { |
| end_time.tv_nsec += 1000000000; |
| --end_time.tv_sec; |
| } |
| |
| printf ("\nRuntime: %lu.%09lu seconds\n", |
| (unsigned long int) end_time.tv_sec, |
| (unsigned long int) end_time.tv_nsec); |
| } |
| } |
| |
| printf ("Result: %08x\n", sum); |
| |
| exit (0); |
| } |
| pthread_mutex_unlock (&running_mutex); |
| |
| if (sync_method == sync_signal) |
| { |
| if (to_thread) |
| /* This code sends a signal to the main thread. */ |
| pthread_kill (tmain, SIGUSR1); |
| else |
| /* Use this code to test sending a signal to the process. */ |
| kill (pid, SIGUSR1); |
| } |
| |
| if (progress) |
| write (STDERR_FILENO, "f", 1); |
| |
| return NULL; |
| } |
| |
| |
| struct start_info |
| { |
| unsigned int starts; |
| unsigned int threads; |
| }; |
| |
| |
| static void * |
| start_threads (void *arg) |
| { |
| struct start_info *si = arg; |
| unsigned int starts = si->starts; |
| pthread_t ths[si->threads]; |
| unsigned int state = starts; |
| unsigned int n; |
| unsigned int i = 0; |
| int err; |
| |
| if (progress) |
| write (STDERR_FILENO, "T", 1); |
| |
| memset (ths, '\0', sizeof (pthread_t) * si->threads); |
| |
| while (starts-- > 0) |
| { |
| if (ths[i] != 0) |
| { |
| /* Wait for the threads in the order they were created. */ |
| err = pthread_join (ths[i], NULL); |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot join thread"); |
| |
| if (progress) |
| write (STDERR_FILENO, "f", 1); |
| } |
| |
| err = pthread_create (&ths[i], &attr, work, |
| (void *) (long) (rand_r (&state) + starts + i)); |
| |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot start thread"); |
| |
| if (progress) |
| write (STDERR_FILENO, "t", 1); |
| |
| if (++i == si->threads) |
| i = 0; |
| } |
| |
| n = i; |
| do |
| { |
| if (ths[i] != 0) |
| { |
| err = pthread_join (ths[i], NULL); |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot join thread"); |
| |
| if (progress) |
| write (STDERR_FILENO, "f", 1); |
| } |
| |
| if (++i == si->threads) |
| i = 0; |
| } |
| while (i != n); |
| |
| if (progress) |
| write (STDERR_FILENO, "F", 1); |
| |
| return NULL; |
| } |
| |
| |
| int |
| main (int argc, char *argv[]) |
| { |
| int remaining; |
| sigset_t ss; |
| pthread_t th; |
| pthread_t *ths = NULL; |
| int empty = 0; |
| int last; |
| bool cont = true; |
| |
| /* Parse and process arguments. */ |
| argp_parse (&argp, argc, argv, 0, &remaining, NULL); |
| |
| if (sync_method == sync_join) |
| { |
| ths = (pthread_t *) calloc (threads, sizeof (pthread_t)); |
| if (ths == NULL) |
| error (EXIT_FAILURE, errno, |
| "cannot allocate memory for thread descriptor array"); |
| |
| last = threads; |
| } |
| else |
| { |
| ths = &th; |
| last = 1; |
| } |
| |
| if (toplevel > threads) |
| { |
| printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n", |
| threads); |
| toplevel = threads; |
| } |
| |
| if (timing) |
| { |
| if (clock_getcpuclockid (0, &cl) != 0 |
| || clock_gettime (cl, &start_time) != 0) |
| timing = false; |
| } |
| |
| /* We need this later. */ |
| pid = getpid (); |
| tmain = pthread_self (); |
| |
| /* We use signal SIGUSR1 for communication between the threads and |
| the main thread. We only want sychronous notification. */ |
| if (sync_method == sync_signal) |
| { |
| sigemptyset (&ss); |
| sigaddset (&ss, SIGUSR1); |
| if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0) |
| error (EXIT_FAILURE, errno, "cannot set signal mask"); |
| } |
| |
| /* Create the thread attributes. */ |
| pthread_attr_init (&attr); |
| |
| /* If the user provided a stack size use it. */ |
| if (stacksize != 0 |
| && pthread_attr_setstacksize (&attr, stacksize) != 0) |
| puts ("could not set stack size; will use default"); |
| /* And stack guard size. */ |
| if (guardsize != -1 |
| && pthread_attr_setguardsize (&attr, guardsize) != 0) |
| puts ("invalid stack guard size; will use default"); |
| |
| /* All threads are created detached if we are not using pthread_join |
| to synchronize. */ |
| if (sync_method != sync_join) |
| pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); |
| |
| if (sync_method == sync_signal) |
| { |
| while (1) |
| { |
| int err; |
| bool do_wait = false; |
| |
| pthread_mutex_lock (&running_mutex); |
| if (starts-- < 0) |
| cont = false; |
| else |
| do_wait = ++running >= threads && starts > 0; |
| |
| pthread_mutex_unlock (&running_mutex); |
| |
| if (! cont) |
| break; |
| |
| if (progress) |
| write (STDERR_FILENO, "t", 1); |
| |
| err = pthread_create (&ths[empty], &attr, thread_function, |
| (void *) starts); |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot start thread %lu", starts); |
| |
| if (++empty == last) |
| empty = 0; |
| |
| if (do_wait) |
| sigwaitinfo (&ss, NULL); |
| } |
| |
| /* Do nothing anymore. On of the threads will terminate the program. */ |
| sigfillset (&ss); |
| sigdelset (&ss, SIGINT); |
| while (1) |
| sigsuspend (&ss); |
| } |
| else |
| { |
| pthread_t ths[toplevel]; |
| struct start_info si[toplevel]; |
| unsigned int i; |
| |
| for (i = 0; i < toplevel; ++i) |
| { |
| unsigned int child_starts = starts / (toplevel - i); |
| unsigned int child_threads = threads / (toplevel - i); |
| int err; |
| |
| si[i].starts = child_starts; |
| si[i].threads = child_threads; |
| |
| err = pthread_create (&ths[i], &attr, start_threads, &si[i]); |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot start thread"); |
| |
| starts -= child_starts; |
| threads -= child_threads; |
| } |
| |
| for (i = 0; i < toplevel; ++i) |
| { |
| int err = pthread_join (ths[i], NULL); |
| |
| if (err != 0) |
| error (EXIT_FAILURE, err, "cannot join thread"); |
| } |
| |
| /* We are done. */ |
| if (progress) |
| write (STDERR_FILENO, "\n", 1); |
| |
| if (timing) |
| { |
| struct timespec end_time; |
| |
| if (clock_gettime (cl, &end_time) == 0) |
| { |
| end_time.tv_sec -= start_time.tv_sec; |
| end_time.tv_nsec -= start_time.tv_nsec; |
| if (end_time.tv_nsec < 0) |
| { |
| end_time.tv_nsec += 1000000000; |
| --end_time.tv_sec; |
| } |
| |
| printf ("\nRuntime: %lu.%09lu seconds\n", |
| (unsigned long int) end_time.tv_sec, |
| (unsigned long int) end_time.tv_nsec); |
| } |
| } |
| |
| printf ("Result: %08x\n", sum); |
| |
| exit (0); |
| } |
| |
| /* NOTREACHED */ |
| return 0; |
| } |
| |
| |
| /* Handle program arguments. */ |
| static error_t |
| parse_opt (int key, char *arg, struct argp_state *state) |
| { |
| unsigned long int num; |
| long int snum; |
| |
| switch (key) |
| { |
| case 't': |
| num = strtoul (arg, NULL, 0); |
| if (num <= MAX_THREADS) |
| threads = num; |
| else |
| printf ("\ |
| number of threads limited to %u; recompile with a higher limit if necessary", |
| MAX_THREADS); |
| break; |
| |
| case 'w': |
| num = strtoul (arg, NULL, 0); |
| if (num <= 100) |
| workload = num; |
| else |
| puts ("workload must be between 0 and 100 percent"); |
| break; |
| |
| case 'c': |
| workcost = strtoul (arg, NULL, 0); |
| break; |
| |
| case 'r': |
| rounds = strtoul (arg, NULL, 0); |
| break; |
| |
| case 's': |
| starts = strtoul (arg, NULL, 0); |
| break; |
| |
| case 'S': |
| num = strtoul (arg, NULL, 0); |
| if (num >= PTHREAD_STACK_MIN) |
| stacksize = num; |
| else |
| printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN); |
| break; |
| |
| case 'g': |
| snum = strtol (arg, NULL, 0); |
| if (snum < 0) |
| printf ("invalid guard size %s\n", arg); |
| else |
| guardsize = snum; |
| break; |
| |
| case 'p': |
| progress = true; |
| break; |
| |
| case 'T': |
| timing = true; |
| break; |
| |
| case OPT_TO_THREAD: |
| to_thread = true; |
| break; |
| |
| case OPT_TO_PROCESS: |
| to_thread = false; |
| break; |
| |
| case OPT_SYNC_SIGNAL: |
| sync_method = sync_signal; |
| break; |
| |
| case OPT_SYNC_JOIN: |
| sync_method = sync_join; |
| break; |
| |
| case OPT_TOPLEVEL: |
| num = strtoul (arg, NULL, 0); |
| if (num < MAX_THREADS) |
| toplevel = num; |
| else |
| printf ("\ |
| number of threads limited to %u; recompile with a higher limit if necessary", |
| MAX_THREADS); |
| sync_method = sync_join; |
| break; |
| |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| |
| return 0; |
| } |
| |
| |
| static hp_timing_t |
| get_clockfreq (void) |
| { |
| /* We read the information from the /proc filesystem. It contains at |
| least one line like |
| cpu MHz : 497.840237 |
| or also |
| cpu MHz : 497.841 |
| We search for this line and convert the number in an integer. */ |
| static hp_timing_t result; |
| int fd; |
| |
| /* If this function was called before, we know the result. */ |
| if (result != 0) |
| return result; |
| |
| fd = open ("/proc/cpuinfo", O_RDONLY); |
| if (__builtin_expect (fd != -1, 1)) |
| { |
| /* XXX AFAIK the /proc filesystem can generate "files" only up |
| to a size of 4096 bytes. */ |
| char buf[4096]; |
| ssize_t n; |
| |
| n = read (fd, buf, sizeof buf); |
| if (__builtin_expect (n, 1) > 0) |
| { |
| char *mhz = memmem (buf, n, "cpu MHz", 7); |
| |
| if (__builtin_expect (mhz != NULL, 1)) |
| { |
| char *endp = buf + n; |
| int seen_decpoint = 0; |
| int ndigits = 0; |
| |
| /* Search for the beginning of the string. */ |
| while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n') |
| ++mhz; |
| |
| while (mhz < endp && *mhz != '\n') |
| { |
| if (*mhz >= '0' && *mhz <= '9') |
| { |
| result *= 10; |
| result += *mhz - '0'; |
| if (seen_decpoint) |
| ++ndigits; |
| } |
| else if (*mhz == '.') |
| seen_decpoint = 1; |
| |
| ++mhz; |
| } |
| |
| /* Compensate for missing digits at the end. */ |
| while (ndigits++ < 6) |
| result *= 10; |
| } |
| } |
| |
| close (fd); |
| } |
| |
| return result; |
| } |
| |
| |
| int |
| clock_getcpuclockid (pid_t pid, clockid_t *clock_id) |
| { |
| /* We don't allow any process ID but our own. */ |
| if (pid != 0 && pid != getpid ()) |
| return EPERM; |
| |
| #ifdef CLOCK_PROCESS_CPUTIME_ID |
| /* Store the number. */ |
| *clock_id = CLOCK_PROCESS_CPUTIME_ID; |
| |
| return 0; |
| #else |
| /* We don't have a timer for that. */ |
| return ENOENT; |
| #endif |
| } |
| |
| |
| #ifdef i386 |
| #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var)) |
| #elif defined __x86_64__ |
| # define HP_TIMING_NOW(Var) \ |
| ({ unsigned int _hi, _lo; \ |
| asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \ |
| (Var) = ((unsigned long long int) _hi << 32) | _lo; }) |
| #elif defined __ia64__ |
| #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory") |
| #else |
| #error "HP_TIMING_NOW missing" |
| #endif |
| |
| /* Get current value of CLOCK and store it in TP. */ |
| int |
| clock_gettime (clockid_t clock_id, struct timespec *tp) |
| { |
| int retval = -1; |
| |
| switch (clock_id) |
| { |
| case CLOCK_PROCESS_CPUTIME_ID: |
| { |
| |
| static hp_timing_t freq; |
| hp_timing_t tsc; |
| |
| /* Get the current counter. */ |
| HP_TIMING_NOW (tsc); |
| |
| if (freq == 0) |
| { |
| freq = get_clockfreq (); |
| if (freq == 0) |
| return EINVAL; |
| } |
| |
| /* Compute the seconds. */ |
| tp->tv_sec = tsc / freq; |
| |
| /* And the nanoseconds. This computation should be stable until |
| we get machines with about 16GHz frequency. */ |
| tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq; |
| |
| retval = 0; |
| } |
| break; |
| |
| default: |
| errno = EINVAL; |
| break; |
| } |
| |
| return retval; |
| } |