| /*============================================================================ |
| bandwidth 0.24, a benchmark to estimate memory transfer bandwidth. |
| Copyright (C) 2005-2010 by Zack T Smith. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| |
| The author may be reached at fbui@comcast.net. |
| *===========================================================================*/ |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <time.h> |
| #include <sys/param.h> |
| #include <sys/types.h> |
| #include <sys/time.h> |
| #include <sys/stat.h> |
| #include <sys/ioctl.h> |
| #include <fcntl.h> |
| #include <unistd.h> |
| #include <wchar.h> |
| #include <math.h> |
| #include <pthread.h> |
| |
| #include <netdb.h> // gethostbyname |
| #include <sys/socket.h> |
| #include <netinet/in.h> |
| #include <arpa/inet.h> |
| |
| #include "defs.h" |
| #include "BMP.h" |
| #include "config.h" |
| |
| #ifdef __WIN32__ |
| #include <windows.h> |
| #endif |
| |
| #ifdef __linux__ |
| #include <linux/fb.h> |
| #include <sys/mman.h> |
| #endif |
| |
| #ifdef CONFIG_ARCH_S2L |
| #if defined(CONFIG_BSP_BOARD_S2LM_KIWI) || defined(CONFIG_BSP_BOARD_STRAWBERRY) |
| #define DRAM_SIZE_SMALL |
| #endif |
| #endif |
| |
| //---------------------------------------- |
| // Graphing data. |
| // |
| static char graph_title [500]; |
| #define TITLE "Results from bandwidth " VERSION " by Zack Smith, http://caladan.tk" |
| static BMP *graph; // Graph of results. |
| static int graph_width = 1280; |
| static int graph_height = 720; |
| static int graph_left_margin = 100; |
| static int graph_margin = 50; // top/bottom/right |
| static int graph_x_span = 1; |
| static int graph_y_span = 1; |
| static int graph_last_x = -1; |
| static int graph_last_y = -1; |
| static unsigned long graph_fg = RGB_BLACK; |
| static int legend_y; |
| #define MAX_GRAPH_DATA 5000 |
| static long graph_data [MAX_GRAPH_DATA]; |
| static int graph_data_index = 0; |
| enum { |
| DATUM_SIZE=0, |
| DATUM_AMOUNT=1, |
| DATUM_COLOR=2, |
| }; |
| static int max_bandwidth = 0; // Always 10 times the # of megabyte/sec. |
| |
| static bool use_sse2 = true; |
| static bool use_sse4 = true; |
| |
| static int goon_flag = 1; |
| static int thread_num = 4; |
| static int chunk_index = 0; |
| static int cpu_num = 0; |
| |
| struct thread_params { |
| int id; |
| unsigned long size; |
| bool random; |
| unsigned long **chunk_ptrs; |
| unsigned char *chunk; |
| unsigned long loops; |
| }; |
| |
| //---------------------------------------- |
| // Parameters for the tests. |
| // |
| static long usec_per_test = 5000000; // 5 seconds per test. |
| |
| static int chunk_sizes[] = { |
| 256, |
| 512, |
| 768, |
| 1024, |
| 2048, |
| 3072, |
| 4096, |
| 6144, |
| 8192, // Some processors' L1 data caches are only 8kB. |
| 12288, |
| 16384, |
| 20480, |
| 24576, |
| 28672, |
| 32768, // Common L1 data cache size. |
| 40960, |
| 49152, |
| 65536, |
| 131072, // Old L2 cache size. |
| 192 * 1024, |
| 256 * 1024, // Old L2 cache size. |
| 384 * 1024, |
| 512 * 1024, // Old L2 cache size. |
| 768 * 1024, |
| 1 << 20, // 1 MB = common L2 cache size. |
| (1024 + 256) * 1024, // 1.25 |
| (1024 + 512) * 1024, // 1.5 |
| (1024 + 768) * 1024, // 1.75 |
| 1 << 21, // 2 MB = common L2 cache size. |
| (2048 + 256) * 1024, // 2.25 |
| (2048 + 512) * 1024, // 2.5 |
| (2048 + 768) * 1024, // 2.75 |
| 3072 * 1024, // 3 MB = common L2 cache sized. |
| 1 << 22, // 4 MB |
| 5242880, // 5 megs |
| 6291456, // 6 megs (std L2 cache size) |
| 16 * 1024 * 1024, |
| 64 * 1024 * 1024, |
| 0 |
| }; |
| |
| //---------------------------------------- |
| // Under CeGCC, the math.h log2() function |
| // turned out to be very inaccurate e.g. |
| // log2(8)=1.44, so I have here hard-coded |
| // the logarithms. |
| // |
| static double chunk_sizes_log2[] = |
| { |
| 8, |
| 9, |
| 9.585, |
| 10, |
| 11, |
| 11.585, |
| 12, |
| 12.585, |
| 13, // 8 kB |
| 13.585, |
| 14, // 16 kB |
| 14.3219, // 20 kB |
| 14.585, // 24 kB |
| 14.8074, // 28 kB |
| 15, // 32 kB |
| 15.3219, // 40 kB |
| 15.585, // 48 kB |
| 16, // 64 kB |
| 17, // 128 kB |
| 17.585, // 192 kB |
| 18, // 256 kB |
| 18.585, // 385 kB |
| 19, // 512 kB |
| 19.585, // 768 kB |
| 20, // 1 MB |
| 20.3219, // 1.25 |
| 20.585, // 1.5 |
| 20.8074, // 1.75 |
| 21, // 2 MB |
| 21.1699, // 2.25 MB |
| 21.3219, // 2.5 MB |
| 21.4594, // 2.75 MB |
| 21.585, // 3 MB |
| 22, // 4 MB |
| 22.3219, |
| 22.585, |
| 24, |
| 26, |
| 0 |
| }; |
| |
| static int min_chunk_size = 1; // These are determined in graph_draw_labels(). |
| static int max_chunk_size = 1; |
| |
| //---------------------------------------------------------------------------- |
| // Name: error |
| // Purpose: Complain and exit. |
| //---------------------------------------------------------------------------- |
| void error (char *s) |
| { |
| #ifndef __WIN32__ |
| fprintf (stderr, "Error: %s\n", s); |
| exit (1); |
| #else |
| wchar_t tmp [200]; |
| int i; |
| for (i = 0; s[i]; i++) |
| tmp[i] = s[i]; |
| tmp[i] = 0; |
| MessageBoxW (0, tmp, L"Error", 0); |
| ExitProcess (0); |
| #endif |
| } |
| |
| void |
| dump_hex64 (unsigned long long value) |
| { |
| unsigned long long v2 = value; |
| int i = 16; |
| while (i--) { |
| unsigned long long tmp = v2 >> 60; |
| unsigned int tmp2 = (unsigned int) tmp; |
| printf ("%1x", tmp2); |
| v2 <<= 4; |
| } |
| } |
| |
| //============================================================================ |
| // Graphing logic. |
| //============================================================================ |
| |
| //---------------------------------------------------------------------------- |
| // Name: graph_draw_labels |
| // Purpose: Draw the labels and ticks. |
| //---------------------------------------------------------------------------- |
| void |
| graph_draw_labels () |
| { |
| int i; |
| |
| //---------------------------------------- |
| // Horizontal |
| // |
| //-------------------- |
| // Establish min & max. |
| // |
| min_chunk_size = 1000; |
| max_chunk_size = 0; |
| i = 0; |
| int j; |
| while ((j = chunk_sizes_log2 [i])) { |
| if (j < min_chunk_size) |
| min_chunk_size = j; |
| if (j > max_chunk_size) |
| max_chunk_size = j; |
| i++; |
| } |
| |
| for (i = min_chunk_size; i <= max_chunk_size; i++) { |
| char str[20]; |
| int x = graph_left_margin + |
| ((i-min_chunk_size) * graph_x_span) / |
| (max_chunk_size - min_chunk_size); |
| int y = graph_height - graph_margin + 10; |
| |
| unsigned long amt = 1 << i; |
| if (amt < 1024) |
| sprintf (str, "%ld B", amt); |
| else if (amt < (1<<20)) { |
| sprintf (str, "%ld kB", amt >> 10); |
| } |
| else { |
| j = amt >> 20; |
| switch ((amt >> 18) & 3) { |
| case 0: sprintf (str, "%d MB", j); break; |
| case 1: sprintf (str, "%d.25 MB", j); break; |
| case 2: sprintf (str, "%d.5 MB", j); break; |
| case 3: sprintf (str, "%d.75 MB", j); break; |
| } |
| } |
| |
| BMP_vline (graph, x, y, y-10, RGB_BLACK); |
| BMP_draw_mini_string (graph, str, x - 10, y+8, RGB_BLACK); |
| } |
| |
| //---------------------------------------- |
| // Vertical |
| // |
| for (i = 0; i <= (max_bandwidth/10000); i++) { |
| char str[20]; |
| int x = graph_left_margin - 10; |
| int y = graph_height - graph_margin - |
| (i * graph_y_span) / (max_bandwidth/10000); |
| |
| BMP_hline (graph, x, x+10, y, RGB_BLACK); |
| |
| sprintf (str, "%d GB/s", i); |
| BMP_draw_mini_string (graph, str, |
| x - 40, y - MINIFONT_HEIGHT/2, RGB_BLACK); |
| } |
| } |
| |
| void |
| graph_init () |
| { |
| if (!graph) |
| return; |
| |
| BMP_clear (graph, RGB_WHITE); |
| |
| BMP_hline (graph, graph_left_margin, graph_width - graph_margin, |
| graph_height - graph_margin, RGB_BLACK); |
| BMP_vline (graph, graph_left_margin, graph_margin, |
| graph_height - graph_margin, RGB_BLACK); |
| |
| graph_x_span = graph_width - (graph_margin + graph_left_margin); |
| graph_y_span = graph_height - 2 * graph_margin; |
| |
| BMP_draw_mini_string (graph, graph_title, |
| graph_left_margin, graph_margin/2, RGB_BLACK); |
| |
| legend_y = graph_margin; |
| } |
| |
| void |
| graph_new_line (char *str, unsigned long color) |
| { |
| BMP_draw_mini_string (graph, str, |
| graph_width - graph_margin - 200, legend_y, color); |
| |
| legend_y += 10; |
| |
| graph_fg = color; |
| graph_last_x = graph_last_y = -1; |
| |
| if (graph_data_index >= MAX_GRAPH_DATA-2) |
| error ("Too many graph data."); |
| |
| graph_data [graph_data_index++] = DATUM_COLOR; |
| graph_data [graph_data_index++] = (long) color; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: graph_add_point |
| // Purpose: Adds a point to this list to be drawn. |
| //---------------------------------------------------------------------------- |
| void |
| graph_add_point (int size, int amount) |
| { |
| if (graph_data_index >= MAX_GRAPH_DATA-4) |
| error ("Too many graph data."); |
| |
| graph_data [graph_data_index++] = DATUM_SIZE; |
| graph_data [graph_data_index++] = size; |
| graph_data [graph_data_index++] = DATUM_AMOUNT; |
| graph_data [graph_data_index++] = amount; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: graph_plot |
| // Purpose: Plots a point on the current graph. |
| //---------------------------------------------------------------------------- |
| void |
| graph_plot (int size, int amount) |
| { |
| //---------------------------------------- |
| // Get the log2 of the chunk size. |
| // We cannot rely on the libm math.h log2 |
| // function, because under CeGCC, |
| // log2(8) = 1.44. |
| // |
| int i = chunk_index; |
| while (chunk_sizes [i] && chunk_sizes [i] != size) |
| i++; |
| if (!chunk_sizes [i]) |
| error ("Lookup of chunk size failed."); |
| double tmp = chunk_sizes_log2 [i]; |
| |
| //---------------------------------------- |
| // Plot the point. The x axis is |
| // logarithmic, base 2. |
| // |
| tmp -= (double) min_chunk_size; |
| tmp *= (double) graph_x_span; |
| tmp /= (double) (max_chunk_size - min_chunk_size); |
| |
| int x = graph_left_margin + (int) tmp; |
| int y = graph_height - graph_margin - |
| (amount * graph_y_span) / max_bandwidth; |
| |
| // Really I ought to save all data points, take max of everything, then plot. |
| |
| if (graph_last_x != -1 && graph_last_y != -1) { |
| BMP_line (graph, graph_last_x, graph_last_y, x, y, graph_fg); |
| } |
| |
| graph_last_x = x; |
| graph_last_y = y; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: graph_make |
| // Purpose: Plots all lines. |
| //---------------------------------------------------------------------------- |
| void |
| graph_make () |
| { |
| int i; |
| |
| //---------------------------------------- |
| // Get the maximum bandwidth in order to |
| // properly scale the graph vertically. |
| // |
| max_bandwidth = 0; |
| for (i = 0; i < graph_data_index; i += 2) { |
| if (graph_data[i] == DATUM_AMOUNT) { |
| int amt = graph_data[i+1]; |
| if (amt > max_bandwidth) |
| max_bandwidth = amt; |
| } |
| } |
| max_bandwidth /= 10000; |
| max_bandwidth *= 10000; |
| max_bandwidth += 10000; |
| |
| graph_draw_labels (); |
| |
| //---------------------------------------- |
| // OK, now draw the lines. |
| // |
| int size = -1, amt = -1; |
| for (i = 0; i < graph_data_index; i += 2) |
| { |
| int type = graph_data[i]; |
| long value = graph_data[i+1]; |
| |
| switch (type) { |
| case DATUM_AMOUNT: amt = value; break; |
| case DATUM_SIZE: size = value; break; |
| case DATUM_COLOR: |
| graph_fg = (unsigned long) value; |
| graph_last_x = -1; |
| graph_last_y = -1; |
| break; |
| } |
| |
| if (amt != -1 && size != -1) { |
| graph_plot (size, amt); |
| amt = size = -1; |
| } |
| } |
| } |
| |
| //============================================================================ |
| // Output buffer logic. |
| //============================================================================ |
| |
| #define MSGLEN 10000 |
| static wchar_t msg [MSGLEN]; |
| |
| void print (wchar_t *s) |
| { |
| wcscat (msg, s); |
| } |
| |
| void newline () |
| { |
| wcscat (msg, L"\n"); |
| } |
| |
| void println (wchar_t *s) |
| { |
| wcscat (msg, s); |
| newline (); |
| } |
| |
| void print_int (int d) |
| { |
| #if defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| swprintf (msg + wcslen (msg), L"%d", d); |
| #else |
| swprintf (msg + wcslen (msg), MSGLEN, L"%d", d); |
| #endif |
| } |
| |
| void println_int (int d) |
| { |
| print_int (d); |
| newline (); |
| } |
| |
| void print_result (long double result) |
| { |
| #if defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| swprintf (msg + wcslen (msg), L"%.1Lf MB/s", result); |
| #else |
| swprintf (msg + wcslen (msg), MSGLEN, L"%.1Lf MB/s", result); |
| #endif |
| } |
| |
| void dump (FILE *f) |
| { |
| if (!f) |
| f = stdout; |
| |
| int i = 0; |
| while (msg[i]) { |
| char ch = (char) msg[i]; |
| fputc (ch, f); |
| i++; |
| } |
| |
| msg [0] = 0; |
| } |
| |
| void flush () |
| { |
| #if defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| MessageBeep (MB_OK); |
| #else |
| dump (NULL); |
| fflush (stdout); |
| #endif |
| } |
| |
| void print_size (unsigned long size) |
| { |
| if (size < 1024) { |
| print_int (size); |
| print (L" B"); |
| } |
| else if (size < (1<<20)) { |
| print_int (size >> 10); |
| print (L" kB"); |
| } else { |
| print_int (size >> 20); |
| switch ((size >> 18) & 3) { |
| case 1: print (L".25"); break; |
| case 2: print (L".5"); break; |
| case 3: print (L".75"); break; |
| } |
| print (L" MB"); |
| } |
| } |
| |
| //============================================================================ |
| // Timing logic. |
| //============================================================================ |
| |
| //---------------------------------------------------------------------------- |
| // Name: mytime |
| // Purpose: Reports time in microseconds. |
| //---------------------------------------------------------------------------- |
| unsigned long mytime () |
| { |
| #ifndef __WIN32__ |
| struct timeval tv; |
| struct timezone tz; |
| memset (&tz, 0, sizeof(struct timezone)); |
| gettimeofday (&tv, &tz); |
| return 1000000 * tv.tv_sec + tv.tv_usec; |
| #else |
| return 1000 * GetTickCount (); // accurate enough. |
| #endif |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: calculate_result |
| // Purpose: Calculates and prints a result. |
| // Returns: 10 times the number of megabytes per second. |
| //---------------------------------------------------------------------------- |
| int |
| calculate_result (unsigned long chunk_size, long long total_count, long diff) |
| { |
| if (!diff) |
| error ("Zero time difference."); |
| |
| // printf ("\nIn calculate_result, chunk_size=%ld, total_count=%lld, diff=%ld\n", chunk_size, total_count, diff); |
| long double result = (long double) chunk_size; |
| result *= (long double) total_count; |
| result *= 1000000.; |
| result /= 1048576.; |
| result /= (long double) diff; |
| |
| print_result (result); |
| |
| return (long) (10.0 * result); |
| } |
| |
| //============================================================================ |
| // Tests. |
| //============================================================================ |
| |
| //---------------------------------------------------------------------------- |
| // Name: do_write |
| // Purpose: Performs write on chunk of memory of specified size. |
| //---------------------------------------------------------------------------- |
| enum { |
| NO_SSE2, |
| SSE2, |
| SSE2_BYPASS, |
| }; |
| |
| static void *do_thread_write(void *arg) |
| { |
| struct thread_params *params = (struct thread_params *)arg; |
| unsigned long total_count = 0; |
| #if defined(__x86_64__) || defined(__aarch64__) |
| unsigned long value = 0x1234567689abcdef; |
| #else |
| unsigned long value = 0x12345678; |
| #endif |
| cpu_set_t mask; |
| |
| CPU_ZERO(&mask); |
| CPU_SET(params->id % cpu_num, &mask); |
| |
| if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) |
| fprintf(stderr, "set thread %d affinity failed\n", params->id); |
| |
| while(goon_flag) { |
| total_count += params->loops; |
| |
| if (params->random) |
| RandomWriter (params->chunk_ptrs, params->size/256, params->loops, value); |
| else |
| Writer (params->chunk, params->size, params->loops, value); |
| } |
| |
| params->loops = total_count; |
| |
| pthread_exit(NULL); |
| |
| return NULL; |
| } |
| |
| |
| int |
| do_write (unsigned long size, int mode, bool random) |
| { |
| unsigned char *chunk; |
| unsigned char *chunk0; |
| unsigned long loops; |
| unsigned long long total_count=0; |
| unsigned long diff=0, t0; |
| unsigned long tmp; |
| unsigned long **chunk_ptrs = NULL; |
| struct thread_params *params; |
| pthread_t *tid; |
| int i, rval; |
| |
| if (size & 255) |
| error ("do_write(): chunk size is not multiple of 256."); |
| |
| params = malloc(sizeof(struct thread_params) * thread_num); |
| if (!params) |
| error ("Out of memory"); |
| |
| tid = malloc(sizeof(pthread_t) * thread_num); |
| if (!tid) |
| error ("Out of memory"); |
| |
| //------------------------------------------------- |
| chunk0 = malloc (size+32); |
| chunk = chunk0; |
| if (!chunk) |
| error ("Out of memory"); |
| |
| tmp = (unsigned long) chunk; |
| if (tmp & 15) { |
| tmp -= (tmp & 15); |
| tmp += 16; |
| chunk = (unsigned char*) tmp; |
| } |
| |
| //---------------------------------------- |
| // Set up random pointers to chunks. |
| // |
| if (random) { |
| tmp = size/256; |
| chunk_ptrs = (unsigned long**) malloc (sizeof (unsigned long*) * tmp); |
| if (!chunk_ptrs) |
| error ("Out of memory."); |
| |
| //---------------------------------------- |
| // Store pointers to all chunks into array. |
| // |
| int i; |
| for (i = 0; i < tmp; i++) { |
| chunk_ptrs [i] = (unsigned long*) (chunk + 256 * i); |
| } |
| |
| //---------------------------------------- |
| // Randomize the array of chunk pointers. |
| // |
| int k = 100; |
| while (k--) { |
| for (i = 0; i < tmp; i++) { |
| int j = rand() % tmp; |
| if (i != j) { |
| unsigned long *ptr = chunk_ptrs [i]; |
| chunk_ptrs [i] = chunk_ptrs [j]; |
| chunk_ptrs [j] = ptr; |
| } |
| } |
| } |
| } |
| |
| //------------------------------------------------- |
| if (random) |
| print (L"Random write "); |
| else |
| print (L"Sequential write "); |
| |
| if (mode == SSE2) { |
| print (L"(128-bit), size = "); |
| } |
| else |
| if (mode == SSE2_BYPASS) { |
| print (L"bypassing cache (128-bit), size = "); |
| } else { |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"(64-bit), size = "); |
| #else |
| print (L"(32-bit), size = "); |
| #endif |
| } |
| |
| print_size (size); |
| print (L", "); |
| |
| loops = (1 << 26) / size;// XX need to adjust for CPU MHz |
| |
| tmp = size / thread_num; |
| |
| for (i = 0; i < thread_num; i++) { |
| params[i].id = i; |
| params[i].random = random; |
| params[i].size = tmp < 1024 ? size : tmp; |
| if (random) |
| params[i].chunk_ptrs = tmp < 1024 ? chunk_ptrs : chunk_ptrs + i * (tmp / 256); |
| else |
| params[i].chunk = tmp < 1024 ? chunk : chunk + i * tmp; |
| params[i].loops = loops; |
| } |
| |
| t0 = mytime (); |
| |
| goon_flag = 1; |
| |
| for (i = 0; i < thread_num; i++) { |
| rval = pthread_create(&tid[i] ,NULL, do_thread_write, ¶ms[i]); |
| if (rval < 0) { |
| perror("can't create pthread\n"); |
| return rval; |
| } |
| } |
| |
| usleep(usec_per_test); |
| |
| goon_flag = 0; |
| |
| for (i = 0; i < thread_num; i++) { |
| pthread_join(tid[i], NULL); |
| total_count += params[i].loops; |
| } |
| |
| diff = mytime () - t0; |
| |
| total_count /= thread_num; |
| |
| print (L"loops = "); |
| print_int (total_count); |
| print (L", "); |
| |
| flush (); |
| |
| int result = calculate_result (size, total_count, diff); |
| newline (); |
| |
| flush (); |
| |
| free ((void*)chunk0); |
| |
| if (chunk_ptrs) |
| free (chunk_ptrs); |
| |
| return result; |
| } |
| |
| static void *do_thread_read(void *arg) |
| { |
| struct thread_params *params = (struct thread_params *)arg; |
| unsigned long total_count = 0; |
| cpu_set_t mask; |
| |
| CPU_ZERO(&mask); |
| CPU_SET(params->id % cpu_num, &mask); |
| |
| if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) |
| fprintf(stderr, "set thread %d affinity failed\n", params->id); |
| |
| while(goon_flag) { |
| total_count += params->loops; |
| |
| if (params->random) |
| RandomReader (params->chunk_ptrs, params->size/256, params->loops); |
| else |
| Reader (params->chunk, params->size, params->loops); |
| } |
| |
| params->loops = total_count; |
| |
| pthread_exit(NULL); |
| |
| return NULL; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: do_read |
| // Purpose: Performs sequential read on chunk of memory of specified size. |
| //---------------------------------------------------------------------------- |
| int |
| do_read (unsigned long size, bool use_sse2, bool random) |
| { |
| unsigned long diff=0; |
| unsigned long long total_count = 0; |
| unsigned char *chunk; |
| unsigned char *chunk0; |
| unsigned long tmp; |
| unsigned long **chunk_ptrs = NULL; |
| unsigned long t0, loops = (1 << 26) / size; // XX need to adjust for CPU MHz |
| struct thread_params *params; |
| pthread_t *tid; |
| int i, rval; |
| |
| if (size & 255) |
| error ("do_read(): chunk size is not multiple of 256."); |
| |
| params = malloc(sizeof(struct thread_params) * thread_num); |
| if (!params) |
| error ("Out of memory"); |
| |
| tid = malloc(sizeof(pthread_t) * thread_num); |
| if (!tid) |
| error ("Out of memory"); |
| |
| //------------------------------------------------- |
| if (random) |
| print (L"Random read "); |
| else |
| print (L"Sequential read "); |
| |
| if (use_sse2) { |
| print (L"(128-bit), size = "); |
| } else { |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"(64-bit), size = "); |
| #else |
| print (L"(32-bit), size = "); |
| #endif |
| } |
| |
| print_size (size); |
| print (L", "); |
| |
| flush (); |
| |
| //------------------------------------------------- |
| chunk0 = chunk = malloc (size+32); |
| if (!chunk) |
| error ("Out of memory"); |
| |
| memset (chunk, 0, size); |
| |
| tmp = (unsigned long) chunk; |
| if (tmp & 15) { |
| tmp -= (tmp & 15); |
| tmp += 16; |
| chunk = (unsigned char*) tmp; |
| } |
| |
| //---------------------------------------- |
| // Set up random pointers to chunks. |
| // |
| if (random) { |
| int tmp = size/256; |
| chunk_ptrs = (unsigned long**) malloc (sizeof (unsigned long*) * tmp); |
| if (!chunk_ptrs) |
| error ("Out of memory."); |
| |
| //---------------------------------------- |
| // Store pointers to all chunks into array. |
| // |
| int i; |
| for (i = 0; i < tmp; i++) { |
| chunk_ptrs [i] = (unsigned long*) (chunk + 256 * i); |
| } |
| |
| //---------------------------------------- |
| // Randomize the array of chunk pointers. |
| // |
| int k = 100; |
| while (k--) { |
| for (i = 0; i < tmp; i++) { |
| int j = rand() % tmp; |
| if (i != j) { |
| unsigned long *ptr = chunk_ptrs [i]; |
| chunk_ptrs [i] = chunk_ptrs [j]; |
| chunk_ptrs [j] = ptr; |
| } |
| } |
| } |
| } |
| |
| tmp = size / thread_num; |
| |
| for (i = 0; i < thread_num; i++) { |
| params[i].id = i; |
| params[i].random = random; |
| params[i].size = tmp < 1024 ? size : tmp; |
| if (random) |
| params[i].chunk_ptrs = tmp < 1024 ? chunk_ptrs : chunk_ptrs + i * (tmp / 256); |
| else |
| params[i].chunk = tmp < 1024 ? chunk : chunk + i * tmp; |
| params[i].loops = loops; |
| } |
| |
| t0 = mytime (); |
| |
| goon_flag = 1; |
| |
| for (i = 0; i < thread_num; i++) { |
| rval = pthread_create(&tid[i] ,NULL, do_thread_read, ¶ms[i]); |
| if (rval < 0) { |
| perror("can't create pthread\n"); |
| return rval; |
| } |
| } |
| |
| usleep(usec_per_test); |
| |
| goon_flag = 0; |
| |
| for (i = 0; i < thread_num; i++) { |
| pthread_join(tid[i], NULL); |
| total_count += params[i].loops; |
| } |
| |
| diff = mytime () - t0; |
| |
| total_count /= thread_num; |
| |
| print (L"loops = "); |
| print_int (total_count); |
| print (L", "); |
| |
| int result = calculate_result (size, total_count, diff); |
| newline (); |
| |
| flush (); |
| |
| free (chunk0); |
| |
| if (chunk_ptrs) |
| free (chunk_ptrs); |
| |
| free(params); |
| free(tid); |
| |
| return result; |
| } |
| |
| |
| |
| //---------------------------------------------------------------------------- |
| // Name: do_copy |
| // Purpose: Performs sequential memory copy. |
| //---------------------------------------------------------------------------- |
| int |
| do_copy (unsigned long size, int mode) |
| { |
| unsigned long loops; |
| unsigned long long total_count = 0; |
| unsigned long t0, diff=0; |
| unsigned char *chunk_src; |
| unsigned char *chunk_dest; |
| unsigned char *chunk_src0; |
| unsigned char *chunk_dest0; |
| unsigned long tmp; |
| |
| if (size & 255) |
| error ("do_copy(): chunk size is not multiple of 256."); |
| |
| //------------------------------------------------- |
| chunk_src0 = chunk_src = malloc (size+32); |
| if (!chunk_src) |
| error ("Out of memory"); |
| chunk_dest0 = chunk_dest = malloc (size+32); |
| if (!chunk_dest) |
| error ("Out of memory"); |
| |
| memset (chunk_src, 100, size); |
| memset (chunk_dest, 200, size); |
| |
| tmp = (unsigned long) chunk_src; |
| if (tmp & 15) { |
| tmp -= (tmp & 15); |
| tmp += 16; |
| chunk_src = (unsigned char*) tmp; |
| } |
| tmp = (unsigned long) chunk_dest; |
| if (tmp & 15) { |
| tmp -= (tmp & 15); |
| tmp += 16; |
| chunk_dest = (unsigned char*) tmp; |
| } |
| |
| //------------------------------------------------- |
| print (L"Sequential copy "); |
| |
| if (mode == SSE2) { |
| print (L"(128-bit), size = "); |
| } |
| else { |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"(64-bit), size = "); |
| #else |
| print (L"(32-bit), size = "); |
| #endif |
| } |
| |
| print_size (size); |
| print (L", "); |
| |
| flush (); |
| |
| loops = (1 << 26) / size; // XX need to adjust for CPU MHz |
| |
| t0 = mytime (); |
| |
| while (diff < usec_per_test) { |
| total_count += loops; |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| if (mode == SSE2) |
| CopySSE (chunk_dest, chunk_src, size, loops); |
| #if 0 |
| else |
| Copy (chunk_dest, chunk_src, size, loops); |
| #endif |
| #endif |
| |
| diff = mytime () - t0; |
| } |
| |
| print (L"loops = "); |
| print_int (total_count); |
| print (L", "); |
| |
| int result = calculate_result (size, total_count, diff); |
| newline (); |
| |
| flush (); |
| |
| free (chunk_src0); |
| free (chunk_dest0); |
| |
| return result; |
| } |
| |
| |
| //---------------------------------------------------------------------------- |
| // Name: fb_readwrite |
| // Purpose: Performs sequential read & write tests on framebuffer memory. |
| //---------------------------------------------------------------------------- |
| #if defined(__linux__) && defined(FBIOGET_FSCREENINFO) |
| void |
| fb_readwrite (bool use_sse2) |
| { |
| //unsigned long counter, total_count; |
| unsigned long total_count; |
| unsigned long length; |
| unsigned long diff, t0; |
| static struct fb_fix_screeninfo fi; |
| static struct fb_var_screeninfo vi; |
| unsigned long *fb = NULL; |
| //unsigned long datum; |
| int fd; |
| //register unsigned long foo; |
| #if defined(__x86_64__) || defined(__aarch64__) |
| unsigned long value = 0x1234567689abcdef; |
| #else |
| unsigned long value = 0x12345678; |
| #endif |
| |
| //------------------------------------------------- |
| |
| fd = open ("/dev/fb0", O_RDWR); |
| if (fd < 0) |
| fd = open ("/dev/fb/0", O_RDWR); |
| if (fd < 0) { |
| println (L"Cannot open framebuffer device."); |
| return; |
| } |
| |
| if (ioctl (fd, FBIOGET_FSCREENINFO, &fi)) { |
| close (fd); |
| println (L"Cannot get framebuffer info"); |
| return; |
| } |
| else |
| if (ioctl (fd, FBIOGET_VSCREENINFO, &vi)) { |
| close (fd); |
| println (L"Cannot get framebuffer info"); |
| return; |
| } |
| else |
| { |
| if (fi.visual != FB_VISUAL_TRUECOLOR && |
| fi.visual != FB_VISUAL_DIRECTCOLOR ) { |
| close (fd); |
| println (L"Need direct/truecolor framebuffer device."); |
| return; |
| } else { |
| unsigned long fblen; |
| |
| print (L"Framebuffer resolution: "); |
| print_int (vi.xres); |
| print (L"x"); |
| print_int (vi.yres); |
| print (L", "); |
| print_int (vi.bits_per_pixel); |
| println (L" bpp\n"); |
| |
| fb = (unsigned long*) fi.smem_start; |
| fblen = fi.smem_len; |
| |
| fb = mmap (fb, fblen, |
| PROT_WRITE | PROT_READ, |
| MAP_SHARED, fd, 0); |
| if (fb == MAP_FAILED) { |
| close (fd); |
| println (L"Cannot access framebuffer memory."); |
| return; |
| } |
| } |
| } |
| |
| //------------------- |
| // Use only the upper half of the display. |
| // |
| length = FB_SIZE; |
| |
| //------------------- |
| // READ |
| // |
| print (L"Framebuffer memory sequential read "); |
| flush (); |
| |
| t0 = mytime (); |
| |
| total_count = FBLOOPS_R; |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| if (use_sse2) |
| ReaderSSE2 (fb, length, FBLOOPS_R); |
| else |
| #endif |
| Reader (fb, length, FBLOOPS_R); |
| |
| diff = mytime () - t0; |
| |
| calculate_result (length, total_count, diff); |
| newline (); |
| |
| //------------------- |
| // WRITE |
| // |
| print (L"Framebuffer memory sequential write "); |
| flush (); |
| |
| t0 = mytime (); |
| |
| total_count = FBLOOPS_W; |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| if (use_sse2) |
| WriterSSE2_bypass (fb, length, FBLOOPS_W, value); |
| else |
| #endif |
| Writer (fb, length, FBLOOPS_W, value); |
| |
| diff = mytime () - t0; |
| |
| calculate_result (length, total_count, diff); |
| newline (); |
| } |
| #endif |
| |
| //---------------------------------------------------------------------------- |
| // Name: register_test |
| // Purpose: Determines bandwidth of register-to-register transfers. |
| //---------------------------------------------------------------------------- |
| void |
| register_test () |
| { |
| long long total_count = 0; |
| unsigned long t0; |
| unsigned long diff = 0; |
| |
| //-------------------------------------- |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"Main register to main register transfers (64-bit) "); |
| #else |
| print (L"Main register to main register transfers (32-bit) "); |
| #endif |
| flush (); |
| #define REGISTER_COUNT 10000 |
| |
| t0 = mytime (); |
| while (diff < usec_per_test) |
| { |
| RegisterToRegister (REGISTER_COUNT); |
| total_count += REGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| //-------------------------------------- |
| #ifdef __x86_64__ |
| print (L"Main register to vector register transfers (64-bit) "); |
| #else |
| print (L"Main register to vector register transfers (32-bit) "); |
| #endif |
| flush (); |
| #define VREGISTER_COUNT 3333 |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| RegisterToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| //-------------------------------------- |
| #ifdef __x86_64__ |
| print (L"Vector register to main register transfers (64-bit) "); |
| #else |
| print (L"Vector register to main register transfers (32-bit) "); |
| #endif |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| VectorToRegister (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| //-------------------------------------- |
| print (L"Vector register to vector register transfers (128-bit) "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| VectorToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| //-------------------------------------- |
| if (use_sse4) { |
| print (L"Vector 8-bit datum to main register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Vector8ToRegister (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| } |
| |
| //-------------------------------------- |
| print (L"Vector 16-bit datum to main register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Vector16ToRegister (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| //-------------------------------------- |
| if (use_sse4) { |
| print (L"Vector 32-bit datum to main register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Vector32ToRegister (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| } |
| |
| #ifdef __x86_64__ |
| //-------------------------------------- |
| print (L"Vector 64-bit datum to main register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Vector64ToRegister (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| #endif |
| |
| //-------------------------------------- |
| if (use_sse4) { |
| print (L"Main register 8-bit datum to vector register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Register8ToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| } |
| |
| //-------------------------------------- |
| print (L"Main register 16-bit datum to vector register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Register16ToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| //-------------------------------------- |
| if (use_sse4) { |
| print (L"Main register 32-bit datum to vector register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Register32ToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| } |
| |
| #ifdef __x86_64__ |
| //-------------------------------------- |
| print (L"Main register 64-bit datum to vector register transfers "); |
| flush (); |
| |
| t0 = mytime (); |
| diff = 0; |
| total_count = 0; |
| while (diff < usec_per_test) |
| { |
| Register64ToVector (VREGISTER_COUNT); |
| total_count += VREGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| #endif |
| #endif |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: stack_test |
| // Purpose: Determines bandwidth of stack-to/from-register transfers. |
| //---------------------------------------------------------------------------- |
| void |
| stack_test () |
| { |
| long long total_count = 0; |
| unsigned long t0; |
| unsigned long diff = 0; |
| |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"Stack-to-register transfers (64-bit) "); |
| #else |
| print (L"Stack-to-register transfers (32-bit) "); |
| #endif |
| flush (); |
| |
| //-------------------------------------- |
| diff = 0; |
| total_count = 0; |
| t0 = mytime (); |
| while (diff < usec_per_test) |
| { |
| StackReader (REGISTER_COUNT); |
| total_count += REGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| |
| #if defined(__x86_64__) || defined(__aarch64__) |
| print (L"Register-to-stack transfers (64-bit) "); |
| #else |
| print (L"Register-to-stack transfers (32-bit) "); |
| #endif |
| flush (); |
| |
| //-------------------------------------- |
| diff = 0; |
| total_count = 0; |
| t0 = mytime (); |
| while (diff < usec_per_test) |
| { |
| StackWriter (REGISTER_COUNT); |
| total_count += REGISTER_COUNT; |
| |
| diff = mytime () - t0; |
| } |
| |
| calculate_result (256, total_count, diff); |
| newline (); |
| flush (); |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: library_test |
| // Purpose: Performs C library tests (memset, memcpy). |
| //---------------------------------------------------------------------------- |
| void |
| library_test () |
| { |
| char *a1, *a2; |
| unsigned long t, t0; |
| int i; |
| |
| |
| #if defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| #define NT_SIZE (1024*1024) |
| #define NT_SIZE2 (50) |
| #elif !defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| #if defined(DRAM_SIZE_SMALL) |
| #define NT_SIZE (16*1024*1024) |
| #else |
| #define NT_SIZE (32*1024*1024) |
| #endif |
| #define NT_SIZE2 (50) |
| #else |
| #define NT_SIZE (64*1024*1024) |
| #define NT_SIZE2 (100) |
| #endif |
| |
| a1 = malloc (NT_SIZE); |
| if (!a1) |
| error ("Out of memory"); |
| |
| a2 = malloc (NT_SIZE); |
| if (!a2) |
| error ("Out of memory"); |
| |
| //-------------------------------------- |
| t0 = mytime (); |
| for (i=0; i<NT_SIZE2; i++) { |
| memset (a1, i, NT_SIZE); |
| } |
| t = mytime (); |
| |
| print (L"Library: memset "); |
| calculate_result (NT_SIZE, NT_SIZE2, t-t0); |
| newline (); |
| |
| flush (); |
| |
| //-------------------------------------- |
| t0 = mytime (); |
| for (i=0; i<NT_SIZE2; i++) { |
| memcpy (a2, a1, NT_SIZE); |
| } |
| t = mytime (); |
| |
| print (L"Library: memcpy "); |
| calculate_result (NT_SIZE, NT_SIZE2, t-t0); |
| newline (); |
| |
| flush (); |
| |
| free (a1); |
| free (a2); |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: network_test_core |
| // Purpose: Performs the network test, talking to and receiving data |
| // back from a transponder node. |
| // Note: Port number specified using server:# notation. |
| // Returns: -1 on error, else the network duration in microseconds. |
| //---------------------------------------------------------------------------- |
| long |
| network_test_core (const char *net_path, char *chunk, |
| unsigned long chunk_size, |
| unsigned long count) |
| { |
| char hostname [PATH_MAX]; |
| char *s; |
| int port = NETWORK_DEFAULT_PORTNUM ; |
| strcpy (hostname, net_path); |
| if ((s = strchr (hostname, ':'))) { |
| *s++ = 0; |
| port = atoi (s); |
| } |
| |
| struct hostent* host = gethostbyname (hostname); |
| if (!host) |
| return -1; |
| |
| char *host_ip = inet_ntoa (*(struct in_addr *)*host->h_addr_list); |
| int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); |
| |
| struct sockaddr_in addr; |
| addr.sin_family = AF_INET; |
| addr.sin_addr.s_addr = inet_addr(host_ip); |
| addr.sin_port = htons(port); |
| |
| if (connect (sock, (struct sockaddr*) &addr, sizeof (struct sockaddr))) |
| { |
| // perror ("connect"); |
| close (sock); |
| return -1; |
| } |
| |
| //------------------------------------ |
| // Send all of our data. |
| // |
| unsigned long t0 = mytime (); |
| int i; |
| for (i = 0; i < count; i++) |
| send (sock, chunk, chunk_size, 0); |
| |
| #if 0 |
| //------------------------------------ |
| // Set nonblocking mode. |
| // |
| int opt = 1; |
| ioctl (sock, FIONBIO, &opt); |
| #endif |
| |
| //------------------------------------ |
| // Read the response. |
| // |
| char *buffer = malloc (chunk_size); |
| if (!buffer) { |
| close (sock); |
| // perror ("malloc"); |
| return -1; |
| } |
| int amount = recv (sock, buffer, chunk_size, 0); |
| if (amount <= 0) { |
| close (sock); |
| //perror ("recv"); |
| return -1; |
| } |
| |
| long t = mytime () - t0; |
| close (sock); |
| free (buffer); |
| return t; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: ip_to_str |
| //---------------------------------------------------------------------------- |
| void |
| ip_to_str (unsigned long addr, char *str) |
| { |
| if (!str) |
| return; |
| |
| unsigned short a = 0xff & addr; |
| unsigned short b = 0xff & (addr >> 8); |
| unsigned short c = 0xff & (addr >> 16); |
| unsigned short d = 0xff & (addr >> 24); |
| sprintf (str, "%u.%u.%u.%u", a,b,c,d); |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: network_transponder |
| // Purpose: Act as a transponder, receiving chunks of data and sending |
| // back an acknowledgement once the enture chunk is read. |
| // Returns: False if a problem occurs setting up the network socket. |
| //---------------------------------------------------------------------------- |
| bool |
| network_transponder () |
| { |
| struct sockaddr_in sin, from; |
| |
| //------------------------------ |
| // Get listening socket for port. |
| // Then listen on given port#. |
| // |
| sin.sin_family = AF_INET; |
| sin.sin_addr.s_addr = htonl(INADDR_ANY); |
| sin.sin_port = htons(NETWORK_DEFAULT_PORTNUM); |
| int listensock; |
| if ((listensock = socket (AF_INET, SOCK_STREAM, 0)) < 0) { |
| perror ("socket"); |
| return false; |
| } |
| if (bind (listensock, (struct sockaddr*) &sin, sizeof(sin)) < 0) { |
| perror ("bind"); |
| close (listensock); |
| return false; |
| } |
| if (listen (listensock, 500) < 0) { |
| perror ("listen"); |
| close (listensock); |
| return false; |
| } |
| |
| bool done = false; |
| while (!done) { |
| //---------------------------------------- |
| // Wait for a client to contact us. |
| // |
| socklen_t len = sizeof (struct sockaddr); |
| int sock = accept (listensock, (struct sockaddr*) &from, &len); |
| if (sock < 0) { |
| perror ("accept"); |
| close (listensock); |
| return false; |
| } |
| |
| if (len != sizeof (struct sockaddr_in)) { |
| close (sock); |
| close (listensock); |
| return false; |
| } |
| |
| #if 0 |
| unsigned long ipaddr = from.sin_addr.s_addr; |
| char ipstring[30]; |
| ip_to_str (ipaddr, ipstring); |
| fprintf (stderr, "Incoming connection from %s\n", ipstring); |
| #endif |
| |
| char chunk [NETWORK_CHUNK_SIZE+1]; |
| long n_chunks = 0; |
| int amount_read = read (sock, chunk, NETWORK_CHUNK_SIZE); |
| chunk [amount_read] = 0; |
| if (1 != sscanf (chunk, "%ld", &n_chunks)) { |
| close (sock); |
| close (listensock); |
| return false; |
| } |
| |
| //---------------------------------------- |
| // If the leader sends us a chunk count of |
| // -99, this indicates that we should exit. |
| // |
| if (n_chunks == -99) { |
| close (sock); |
| close (listensock); |
| return true; |
| } |
| |
| // printf ("Reading %lu chunks of %d bytes...\n", n_chunks, NETWORK_CHUNK_SIZE); |
| |
| unsigned long long remaining = n_chunks; |
| remaining *= NETWORK_CHUNK_SIZE; |
| |
| // printf ("remaining="); dump_hex64(remaining); puts(""); |
| |
| remaining -= amount_read; |
| while (remaining > 0) { |
| amount_read = read (sock, chunk, NETWORK_CHUNK_SIZE); |
| remaining -= amount_read; |
| |
| if (amount_read < 0) { |
| perror ("read"); |
| break; |
| } else |
| if (!amount_read) |
| break; |
| } |
| |
| char *foo = "OK.\n\n"; |
| write (sock, foo, 4); |
| close (sock); |
| } |
| |
| return true; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: network_test |
| //---------------------------------------------------------------------------- |
| bool |
| network_test (char **destinations, int n_destinations) |
| { |
| int i; |
| |
| //---------------------------------------- |
| // The memory chunk starts with a 12-byte |
| // length of the overall send size. |
| // The memory chunk will have a list of |
| // the destinations in it. |
| // In future, there will be a mechanism |
| // for testing bandwidth between all nodes, |
| // not just the leader & each of the |
| // transponders. |
| // |
| char chunk [NETWORK_CHUNK_SIZE]; |
| memset (chunk, 0, NETWORK_CHUNK_SIZE); |
| sprintf (chunk, "000000000000\n%d\n", n_destinations); |
| for (i = 0; i < n_destinations; i++) { |
| char *s = destinations [i]; |
| int chunk_len = strlen (chunk); |
| int len = strlen (s); |
| if (len + chunk_len < NETWORK_CHUNK_SIZE-1) { |
| //---------------------------------------- |
| // "transp" indicates that the given node |
| // has not yet been a leader. |
| // In future, "done" will indicate it has. |
| // |
| sprintf (chunk + chunk_len, "%s %s\n", s, "transp"); |
| } |
| } |
| |
| //---------------------------------------- |
| // For each destination, run the test. |
| // |
| for (i = 0; i < n_destinations; i++) { |
| int j = 0; |
| bool problem = false; |
| |
| char *hostname = destinations[i]; |
| printf ("Bandwidth sending to %s:\n", hostname); |
| |
| //---------------------------------------- |
| // Send from 8kB up to 32 MB of data. |
| // |
| while (!problem && j < 13) { |
| unsigned long chunk_count = 1 << j; |
| unsigned long long amt_to_send = chunk_count; |
| amt_to_send *= NETWORK_CHUNK_SIZE; |
| |
| if (!amt_to_send) // unlikely |
| break; |
| |
| //---------------------------------------- |
| // Write the overall send size into the |
| // 1st line of the chunk so that the |
| // transponder knows how large the send |
| // is without guessing. |
| // |
| sprintf (chunk, "%11lu", chunk_count); |
| chunk[11] = ' '; |
| |
| //-------------------- |
| // Send the data. |
| // |
| long duration = network_test_core (hostname, |
| chunk, NETWORK_CHUNK_SIZE, chunk_count); |
| if (duration == -1) { |
| problem = true; |
| fprintf (stderr, "\nCan't connect to %s\n", hostname); |
| } else { |
| unsigned long amt_in_kb = amt_to_send / 1024; |
| unsigned long amt_in_mb = amt_to_send / 1048576; |
| if (!amt_in_mb) { |
| printf ("\tSent %lu kB...", amt_in_kb); |
| } else { |
| printf ("\tSent %lu MB...", amt_in_mb); |
| } |
| |
| //------------------------------ |
| // Calculate rate in MB/sec. |
| // |
| // Get total # bytes. |
| unsigned long long tmp = NETWORK_CHUNK_SIZE; |
| tmp *= chunk_count; |
| |
| // Get total bytes per second. |
| tmp *= 1000000; |
| tmp /= duration; |
| |
| // Bytes to megabytes. |
| tmp /= 1000; |
| tmp /= 10; |
| unsigned long whole = tmp / 100; |
| unsigned long frac = tmp % 100; |
| printf ("%lu.%02lu MB/second\n", whole, frac); |
| } |
| j++; |
| } |
| |
| puts (""); |
| } |
| |
| return true; |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: usage |
| //---------------------------------------------------------------------------- |
| void |
| usage () |
| { |
| printf ("Usage for memory tests: bandwidth [--quick] [--thread N] [--chunk-size N]\n"); |
| printf ("Usage for starting network tests: bandwidth --network <ipaddr1> [<ipaddr2...]\n"); |
| printf ("Usage for receiving network tests: bandwidth --transponder\n"); |
| exit (0); |
| } |
| |
| //---------------------------------------------------------------------------- |
| // Name: main |
| //---------------------------------------------------------------------------- |
| int |
| main (int argc, char **argv) |
| { |
| int i, j, chunk_size; |
| |
| --argc; |
| ++argv; |
| |
| strcpy (graph_title, TITLE); |
| |
| bool network_mode = false; |
| bool network_leader = false; // false => transponder |
| int network_destinations_size = 0; |
| int n_network_destinations = 0; |
| char **network_destinations = NULL; |
| |
| i = 0; |
| while (i < argc) { |
| char *s = argv [i++]; |
| if (!strcmp ("--network", s)) { |
| network_mode = true; |
| network_leader = true; |
| network_destinations_size = 20; |
| network_destinations = (char**) malloc (network_destinations_size * sizeof (char*)); |
| } |
| else |
| if (!strcmp ("--transponder", s)) { |
| network_mode = true; |
| } |
| else |
| if (!strcmp ("--slow", s)) { |
| usec_per_test=20000000; // 20 seconds per test. |
| } |
| else |
| if (!strcmp ("--quick", s)) { |
| usec_per_test = 250000; // 0.25 seconds per test. |
| } |
| else |
| if (!strcmp ("--nosse2", s)) { |
| use_sse2 = false; |
| use_sse4 = false; |
| } |
| else |
| if (!strcmp ("--nosse4", s)) { |
| use_sse4 = false; |
| } |
| else |
| if (!strcmp ("--help", s)) { |
| usage (); |
| } |
| else |
| if (!strcmp ("--title", s) && i != argc) { |
| sprintf (graph_title, "%s -- %s", TITLE, argv[i++]); |
| } |
| else |
| if (!strcmp ("--thread", s)) { |
| int n = 0; |
| thread_num = atoi(argv[i++]); |
| for (j = 0; j < 32; j++) |
| n += (thread_num >> j) & 0x1; |
| if (n > 1) |
| error("thread_num must be power of 2\n"); |
| } |
| else |
| if (!strcmp ("--chunk-size", s)) { |
| chunk_size = strtoul(argv[i++], NULL, 0); |
| for (j = 0; j < sizeof(chunk_sizes) / sizeof(chunk_sizes[0]); j++) { |
| if (chunk_size <= chunk_sizes[j]) |
| break; |
| } |
| chunk_index = j; |
| } |
| else { |
| if (!network_mode || !network_leader) |
| usage (); |
| |
| if ('-' == *s) |
| usage (); |
| |
| if (n_network_destinations >= network_destinations_size) { |
| network_destinations_size *= 2; |
| int newsize = sizeof(char*) * network_destinations_size; |
| network_destinations = realloc (network_destinations, |
| newsize); |
| } |
| |
| network_destinations [n_network_destinations++] = strdup (s); |
| } |
| } |
| |
| msg[0] = 0; |
| |
| #if !(defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__))) |
| printf ("This is bandwidth version %s.\n", VERSION); |
| printf ("Copyright (C) 2005-2010 by Zack T Smith.\n\n"); |
| printf ("This software is covered by the GNU Public License.\n"); |
| printf ("It is provided AS-IS, use at your own risk.\n"); |
| printf ("See the file COPYING for more information.\n\n"); |
| fflush (stdout); |
| #else |
| println (L"(C) 2010 by Zack Smith"); |
| println (L"Under GNU Public License"); |
| println (L"Use at your own risk."); |
| #endif |
| |
| //---------------------------------------- |
| // If network mode selected, enter it now. |
| // Currently cannot combine memory tests |
| // & network tests. |
| // |
| if (network_mode) { |
| if (network_leader) { |
| network_test (network_destinations, n_network_destinations); |
| } else { |
| network_transponder (); |
| } |
| |
| puts ("Done."); |
| return 0; |
| } |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| if (!has_sse2 ()) { |
| puts ("Processor does not have SSE2."); |
| use_sse2 = false; |
| use_sse4 = false; |
| } |
| |
| #ifdef __x86_64__ |
| if (use_sse2) |
| println (L"Using 128-bit and 64-bit data transfers."); |
| else |
| println (L"Using 64-bit data transfers."); |
| #else |
| if (use_sse2) |
| println (L"Using 128-bit and 32-bit data transfers."); |
| else |
| println (L"Using 32-bit data transfers."); |
| #endif |
| |
| #else |
| |
| #if defined(__aarch64__) |
| println (L"Using 64-bit transfers."); |
| #else |
| println (L"Using 32-bit transfers."); |
| #endif |
| |
| use_sse2 = false; |
| #endif |
| |
| println (L"Notation: kB = 1024 B, MB = 1048576 B."); |
| |
| flush (); |
| |
| //------------------------------------------------------------ |
| // Attempt to obtain information about the CPU. |
| // |
| #ifdef __linux__ |
| struct stat st; |
| if (!stat ("/proc/cpuinfo", &st)) { |
| #define TMPFILE "/tmp/bandw_tmp" |
| unlink (TMPFILE); |
| if (-1 == system ("grep MHz /proc/cpuinfo | uniq | sed \"s/[\\t\\n: a-zA-Z]//g\" > "TMPFILE)) |
| perror ("system"); |
| |
| FILE *f = fopen (TMPFILE, "r"); |
| if (f) { |
| float cpu_speed = 0.0; |
| |
| if (1 == fscanf (f, "%g", &cpu_speed)) { |
| puts (""); |
| printf ("CPU speed is %g MHz.\n", cpu_speed); |
| } |
| fclose (f); |
| } |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| unlink (TMPFILE); |
| if (-1 == system ("grep -i sse4 /proc/cpuinfo > "TMPFILE)) |
| perror ("system"); |
| |
| if (!stat (TMPFILE, &st)) { |
| if (st.st_size < 2) { |
| use_sse4 = false; |
| puts ("Processor lacks SSE4."); |
| } |
| } |
| |
| if (!use_sse2) { |
| unlink (TMPFILE); |
| if (-1 == system ("grep -i sse2 /proc/cpuinfo > "TMPFILE)) |
| perror ("system"); |
| |
| if (!stat (TMPFILE, &st)) { |
| if (st.st_size < 2) { |
| use_sse2 = false; |
| puts ("Processor lacks SSE2."); |
| } |
| } |
| } |
| #endif |
| } else { |
| printf ("CPU information is not available (/proc/cpuinfo).\n"); |
| } |
| |
| cpu_num = sysconf(_SC_NPROCESSORS_CONF); |
| printf("System has %d processor(s)\n", cpu_num); |
| |
| fflush (stdout); |
| #endif |
| |
| graph = BMP_new (graph_width, graph_height); |
| graph_init (); |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| //------------------------------------------------------------ |
| // SSE2 sequential reads. |
| // |
| if (use_sse2) { |
| graph_new_line ("Sequential 128-bit reads", RGB_RED); |
| |
| newline (); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_read (chunk_size, true, false); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| |
| //------------------------------------------------------------ |
| // SSE2 random reads. |
| // |
| if (use_sse2) { |
| graph_new_line ("Random 128-bit reads", RGB_MAROON); |
| |
| newline (); |
| srand (time (NULL)); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_read (chunk_size, true, true); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| |
| //------------------------------------------------------------ |
| // SSE2 sequential writes that do not bypass the caches. |
| // |
| if (use_sse2) { |
| graph_new_line ("Sequential 128-bit cache writes", RGB_PURPLE); |
| |
| newline (); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, SSE2, false); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| |
| //------------------------------------------------------------ |
| // SSE2 random writes that do not bypass the caches. |
| // |
| if (use_sse2) { |
| graph_new_line ("Random 128-bit cache writes", RGB_NAVYBLUE); |
| |
| newline (); |
| srand (time (NULL)); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, SSE2, true); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| |
| //------------------------------------------------------------ |
| // SSE2 sequential writes that do bypass the caches. |
| // |
| if (use_sse2) { |
| graph_new_line ("Sequential 128-bit bypassing writes", RGB_DARKORANGE); |
| |
| newline (); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, SSE2_BYPASS, false); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| |
| //------------------------------------------------------------ |
| // SSE2 random writes that bypass the caches. |
| // |
| if (use_sse2) { |
| graph_new_line ("Random 128-bit bypassing writes", RGB_LEMONYELLOW); |
| |
| newline (); |
| srand (time (NULL)); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, SSE2_BYPASS, true); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| #endif |
| |
| //------------------------------------------------------------ |
| // Sequential non-SSE2 reads. |
| // |
| newline (); |
| #if defined(__x86_64__) || defined(__aarch64__) |
| graph_new_line ("Sequential 64-bit reads", RGB_BLUE); |
| #else |
| graph_new_line ("Sequential 32-bit reads", RGB_BLUE); |
| #endif |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_read (chunk_size, false, false); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| |
| //------------------------------------------------------------ |
| // Random non-SSE2 reads. |
| // |
| newline (); |
| #if defined(__x86_64__) || defined(__aarch64__) |
| graph_new_line ("Random 64-bit reads", RGB_CYAN); |
| #else |
| graph_new_line ("Random 32-bit reads", RGB_CYAN); |
| #endif |
| srand (time (NULL)); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_read (chunk_size, false, true); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| |
| //------------------------------------------------------------ |
| // Sequential non-SSE2 writes. |
| // |
| #if defined(__x86_64__) || defined(__aarch64__) |
| graph_new_line ("Sequential 64-bit writes", RGB_DARKGREEN); |
| #else |
| graph_new_line ("Sequential 32-bit writes", RGB_DARKGREEN); |
| #endif |
| |
| newline (); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, NO_SSE2, false); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| |
| //------------------------------------------------------------ |
| // Random non-SSE2 writes. |
| // |
| #if defined(__x86_64__) || defined(__aarch64__) |
| graph_new_line ("Random 64-bit writes", RGB_GREEN); |
| #else |
| graph_new_line ("Random 32-bit writes", RGB_GREEN); |
| #endif |
| |
| newline (); |
| srand (time (NULL)); |
| |
| i = chunk_index; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_write (chunk_size, NO_SSE2, true); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| |
| #if !defined(__arm__) && !defined(__aarch64__) |
| //------------------------------------------------------------ |
| // SSE2 sequential copy. |
| // |
| if (use_sse2) { |
| graph_new_line ("Sequential 128-bit copy", 0x8f8844); |
| |
| newline (); |
| |
| i = 0; |
| while ((chunk_size = chunk_sizes [i++])) { |
| int amount = do_copy (chunk_size, SSE2); |
| |
| graph_add_point (chunk_size, amount); |
| } |
| } |
| #endif |
| |
| //------------------------------------------------------------ |
| // Register to register. |
| // |
| newline (); |
| register_test (); |
| |
| //------------------------------------------------------------ |
| // Stack to/from register. |
| // |
| newline (); |
| stack_test (); |
| |
| //------------------------------------------------------------ |
| // C library performance. |
| // |
| newline (); |
| library_test (); |
| |
| //------------------------------------------------------------ |
| // Framebuffer read & write. |
| // |
| #if defined(__linux__) && defined(FBIOGET_FSCREENINFO) |
| newline (); |
| fb_readwrite (true); |
| #endif |
| |
| #if defined(__WIN32__) && (defined(__arm__) || defined(__aarch64__)) |
| MessageBoxW (0, msg, APPNAME, 0); |
| |
| FILE *of = fopen ("bandwidth.log", "w"); |
| if (of) { |
| dump (of); |
| fclose (of); |
| } |
| #else |
| flush (); |
| #endif |
| |
| graph_make (); |
| |
| BMP_write (graph, "bandwidth.bmp"); |
| BMP_delete (graph); |
| #if defined(__linux__) || defined(__CYGWIN__) || defined(__APPLE__) |
| puts ("\nWrote graph to bandwidth.bmp."); |
| puts (""); |
| puts ("Done."); |
| #endif |
| |
| return 0; |
| } |