# ------------------------------------ snip ------------------ # ToDo: Altix4700 # # load "-" # load "< script.sh" # gnuplot headerfile - trailerfile # between from stdin # set title "memory bandwidth per core vs. cores (256MB random)" # ToDo: better randomBW versus streamBW(=peak) set xlabel "Cores" set key left Left reverse set logscale x #set ytics 2 set xtics 2 set ylabel "BW [MB/s]" set xtics 2 set size 0.6,1 plot [1:16][8:100] \ "-" u 1:6 t "GS1280-ev7-1.15GHz" w lp 3 3,\ "-" u 1:6 t "4DualOpt885-2.6GHz" w lp 4 4,\ "-" u 1:6 t "2Quad-Nehalem-2.0GHz" w lp 5 5,\ "-" u 1:6 t "HT P4-1.3GHz-FSB400 / Xeon-3GHz" w lp 1 1,\ "-" u 1:6 t "Altix330-ia64-1.5GHz" w lp 2 2,\ 0.01 t "" w l 1 1 # # GS1280-alpha (as an orientation, under load) # 4GB/CPU (32CPUs/RAD, mem striping) # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 1 262144 1 8.18 32.80 32.80 243.9 random best # 1 262144 1 9.36 28.68 28.68 278.9 random worst 2 262144 1 7.62 70.48 35.24 227.0 random # 2 262144 1 8.15 65.86 32.93 242.9 random 4 262144 1 7.95 135.12 33.78 236.8 random best # 4 262144 1 8.53 125.87 31.47 254.2 random worst # 4 1.049e+06 1 41.89 102.54 25.63 312.1 random # 4 32768 3 2.56 157.52 39.38 203.2 random 8 262144 1 7.39 290.73 36.34 220.1 random 16 262144 1 7.47 574.82 35.93 222.7 random e # comp1 ProLiant DL585 4*DualOpteron885-2.6GHz 32GB 4GB/core # 1 262144 1 4.82 55.69 55.69 143.7 random 0xffff 1 262144 1 4.66 57.58 57.58 138.9 random 0xffff # 2 262144 1 4.94 108.58 54.29 147.4 random 0x0011 2 262144 1 4.61 116.38 58.19 137.5 random 0x5555 # 2 262144 1 4.63 115.89 57.94 138.1 random 0xffff # 4 262144 1 5.12 209.51 52.38 152.7 random 0x5555 4 262144 1 4.37 245.68 61.42 130.2 random 0xffff # 8 262144 1 6.18 347.51 43.44 184.2 random 0xffff # 8 262144 1 5.06 424.31 53.04 150.8 random 0xffff 2nd run numactl 8 262144 1 4.45 482.89 60.36 132.5 random 0xffff 3th run e # 2*Quad Nehalem E5504-2GHz, RAM=72GB=18*4GB # 1 262144 1 3.97 67.69 67.69 118.2 random 0x00ff -O0 1 262144 1 3.81 70.38 70.38 113.7 random 0x00ff 2 262144 1 3.82 140.42 70.21 113.9 random 0x00ff 4 262144 1 4.09 262.34 65.59 122.0 random 0x00ff 8 262144 3 13.88 464.08 58.01 137.9 random 0x00ff e # P4-1.3GHz-FSB400 # mpi=1 mem/task= 256.000 MB int32=4B mintime[s]=4 # measure: BW = bandwidth, aBW = aggregate BW, lat = latency # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 1 262144 1 13.20 20.34 20.34 196.6 random 2 262144 1 21.73 24.70 12.35 323.9 random # Tina Xeon-3GHz 1 262144 1 12.13 22.12 22.12 180.8 random 2 262144 1 13.33 40.28 20.14 198.6 random e # altix330-ia64-1.5GHz 16*4GB=64GB (different processor bindings) # FSB500??? Numalink=800MB/s??? whitepaper_local=145ns 1 262144 1 4.81 55.82 55.82 143.3 random 0xffff 2 262144 1 4.80 111.95 55.97 142.9 random 0x1111 # 4 2.09715e+06 1 45.98 186.81 46.70 171.3 random 0x1111 4 262144 1 4.80 223.86 55.97 142.9 random 0x1111 8 262144 3 14.40 447.47 55.93 143.0 random 0x5555 16 262144 1 5.58 769.24 48.08 166.4 random 0xffff 1 262144 1 4.81 55.82 55.82 143.3 random 0xffff 2 262144 1 5.58 96.27 48.13 166.2 random 0xffff 4 262144 1 5.58 192.52 48.13 166.2 random 0xffff 8 262144 1 5.58 385.04 48.13 166.2 random 0xffff 16 262144 1 5.58 769.24 48.08 166.4 random 0xffff e pause -1 # set title "memory bandwidth per core vs. cores (256MB lstream)" # ToDo: better randomBW versus streamBW(=peak) set xlabel "Cores" set key left Left reverse set logscale x set xtics 2 set ylabel "BW [MB/s]" set xtics 2 plot [1:16][256:7*1024] \ "-" u 1:6 t "GS1280-ev7-1.15GHz" w lp 3 3,\ "-" u 1:6 t "4DualOpt885-2.6GHz" w lp 4 4,\ "-" u 1:6 t "2Quad-Nehalem-2.0GHz" w lp 5 5,\ "-" u 1:6 t "HT P4-1.3GHz-FSB400 / Xeon-3GHz" w lp 1 1,\ "-" u 1:6 t "Altix330-ia64-1.5GHz" w lp 2 2 # # GS1280-alpha (as an orientation, under load) # 4GB/CPU (32CPUs/RAD, mem striping) 1 262144 15 2.06 1956.80 1956.80 4.1 lstream 2 262144 15 2.07 3892.25 1946.12 4.1 lstream # 4 1.049e+06 7 3.95 7620.68 1905.17 4.2 lstream 4 262144 15 2.03 7946.02 1986.50 4.0 lstream 8 262144 15 2.16 14932.31 1866.54 4.3 lstream 16 262144 15 2.40 26885.84 1680.36 4.8 lstream e # comp1 ProLiant DL585 4*DualOpteron885-2.6GHz 32GB 4GB/core 1 262144 31 3.41 2437.42 2437.42 3.3 lstream 0xffff # 2 262144 15 2.16 3726.95 1863.48 4.3 lstream 0x0011 2 262144 31 3.39 4908.53 2454.27 3.3 lstream 0x5555 # 4 262144 15 2.59 6218.96 1554.74 5.1 lstream 0x5555 4 262144 31 3.62 9183.69 2295.92 3.5 lstream 0xffff # 8 262144 15 4.04 7982.90 997.86 8.0 lstream 0xffff # 8 262144 15 2.90 11118.24 1389.78 5.8 lstream 0xffff new? 8 262144 15 2.01 16041.83 2005.23 4.0 lstream 0xffff e # 2*Quad Nehalem E5504-2GHz, RAM=72GB=18*4GB, gcc-4.3.3 ompi-1.3.3 # 1 262144 15 2.18 1849.86 1849.86 4.3 lstream 0x00ff -O0 1 262144 63 2.66 6350.45 6350.45 1.3 lstream 0x00ff 2 262144 63 2.83 11933.71 5966.85 1.3 lstream 0x00ff # 2 262144 63 3.59 9414.26 4707.13 1.7 lstream 0x0011 4 262144 31 2.01 16554.31 4138.58 1.9 lstream 0x00ff 8 262144 127 13.31 20491.13 2561.39 3.1 lstream 0x00ff e # P4-1.3GHz 1 262144 31 5.37 1550.73 1550.73 2.6 lstream 2 262144 31 7.58 2194.73 1097.36 3.6 lstream # Tina Xeon-3GHz 1 262144 15 2.21 1821.56 1821.56 2.2 lstream 2 262144 15 2.56 3142.50 1571.25 2.5 lstream e # altix330-ia64-1.5GHz 16*4GB=64GB (different processor bindings) # FSB500??? Numalink=800MB/s??? whitepaper_local=145ns # 1 262144 7 2.56 735.22 735.22 10.9 lstream 0xffff unroll 1 262144 15 5.60 719.33 719.33 11.1 lstream 0xffff 2 262144 15 5.60 1437.71 718.86 11.1 lstream 0x1111 4 262144 15 5.60 2875.80 718.95 11.1 lstream 0x1111 8 262144 15 5.60 5751.61 718.95 11.1 lstream 0x5555 16 262144 15 5.68 11341.69 708.86 11.3 lstream 0xffff 1 262144 15 5.60 719.33 719.33 11.1 lstream 0xffff 2 262144 15 5.68 1418.83 709.42 11.3 lstream 0xffff 4 262144 15 5.68 2836.97 709.24 11.3 lstream 0xffff 8 262144 15 5.68 5674.67 709.33 11.3 lstream 0xffff 16 262144 15 5.68 11341.69 708.86 11.3 lstream 0xffff e pause -1 # ------------- snip ------------------- #gnuplot #set logscale xy #set ytics 2 set size 1,1 set nolabel set ytics auto set nologscale y set logscale xy set key left bottom set title "memory BW vs. arraysize" set xlabel "arraysize [B]" set ylabel "BW/task [MB/s]" set xtics ("1" 1,"4" 4,"16" 16,"64" 64,"256" 256,"1k" 1024,"4k" 4096,"16k" 4*4096,"64k" 65536,"256k" 262144,"1M" 1048576,"4M" 4*1024*1024,"16M" 16*1024*1024,"64M" 64*1024*1024,"256M" 256*1024*1024) plot [256:2**30] [1:] \ "-" u ($2*1024):6 t "GS1280-ev7-1.15GHz" w lp 3 1,\ "-" u ($2*1024):6 t "" w lp 3 2,\ "-" u ($2*1024):6 t "4DualOpt885-2.6GHz" w lp 4 1,\ "-" u ($2*1024):6 t "" w lp 4 2,\ "-" u ($2*1024):6 t "2Quad-Nehalem-2.0GHz" w lp 5 1,\ "-" u ($2*1024):6 t "" w lp 5 2,\ "-" u ($2*1024):6 t "HT P4-1.3GHz-FSB400" w lp 1 1,\ "-" u ($2*1024):6 t "" w lp 1 2,\ "-" u ($2*1024):6 t "Altix330-ia64-1.5GHz" w lp 2 1,\ "-" u ($2*1024):6 t "" w lp 2 2,\ 0.01 t "stream (maxBW)" w p 1 2,\ 0.01 t "random access (minBW)" w p 1 1 # # # GS1280-alpha (as an orientation, under load) # 4GB/CPU (32CPUs/RAD, mem striping) # measure: BW = bandwidth, aBW = aggregate BW, lat = latency # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 1 262144 1 8.18 32.80 32.80 243.9 random 1 131072 1 3.74 35.93 35.93 222.7 random 1 65536 3 5.28 38.11 38.11 209.9 random 1 32768 3 2.50 40.28 40.28 198.6 random 1 16384 7 2.45 47.96 47.96 166.8 random 1 8192 15 2.52 49.84 49.84 160.5 random 1 4096 31 2.09 62.10 62.10 128.8 random 1 2048 255 2.67 200.20 200.20 40.0 random 1 1024 2047 3.33 644.24 644.24 12.4 random 1 512 4095 3.21 668.26 668.26 12.0 random 1 256 8191 3.20 671.13 671.13 11.9 random 1 128 16383 2.97 724.10 724.10 11.0 random 1 64 131071 3.84 2237.83 2237.83 3.6 random 1 32 262143 3.82 2250.47 2250.47 3.6 random 1 16 524287 3.81 2253.51 2253.51 3.6 random 1 8 1048575 3.81 2255.33 2255.33 3.5 random 1 4 2097151 3.79 2265.70 2265.70 3.5 random 1 2 4194303 3.77 2279.89 2279.89 3.5 random 1 1 8388607 3.63 2366.89 2366.89 3.4 random 1 0.5 16777215 3.45 2492.27 2492.27 3.2 random 1 0.25 33554431 3.50 2452.14 2452.14 3.3 random 4 1.049e+06 1 41.89 102.54 25.63 312.1 random 4 524288 1 17.44 123.13 30.78 259.9 random 4 262144 1 7.97 134.68 33.67 237.6 random 4 131072 1 3.92 136.87 34.22 233.8 random 4 65536 3 5.77 139.54 34.89 229.3 random 4 32768 3 2.98 135.31 33.83 236.5 random 4 16384 7 3.41 137.94 34.48 232.0 random 4 8192 15 2.80 179.53 44.88 178.2 random 4 4096 31 2.63 197.98 49.49 161.6 random 4 262144 1 7.76 138.33 34.58 231.3 random 4 131072 1 3.73 143.82 35.96 222.5 random 4 65536 3 5.56 144.95 36.24 220.8 random 4 32768 3 2.47 162.74 40.69 196.6 random 4 16384 7 2.23 210.94 52.74 151.7 random 4 8192 31 4.08 255.04 63.76 125.5 random 4 4096 63 3.61 292.64 73.16 109.3 random 4 2048 255 3.02 708.81 177.20 45.1 random 4 1024 2047 3.30 2604.22 651.05 12.3 random 4 512 4095 3.21 2671.35 667.84 12.0 random 4 256 8191 3.20 2682.80 670.70 11.9 random 4 128 16383 2.97 2896.41 724.10 11.0 random 4 64 131071 3.85 8934.63 2233.66 3.6 random 4 32 262143 3.83 8970.55 2242.64 3.6 random 4 16 524287 3.82 8985.01 2246.25 3.6 random 4 8 1048575 3.90 8803.19 2200.80 3.6 random 4 4 2097151 3.80 9031.05 2257.76 3.5 random 4 2 4194303 3.78 9092.34 2273.08 3.5 random 4 1 8388607 3.66 9387.94 2346.99 3.4 random 4 0.5 16777215 3.45 9972.06 2493.02 3.2 random 4 0.25 33554431 3.07 11180.30 2795.07 2.9 random 8 262144 1 7.39 290.73 36.34 220.1 random 8 131072 1 3.64 294.61 36.83 217.2 random 8 65536 3 5.84 275.70 34.46 232.1 random 8 32768 3 2.99 268.95 33.62 238.0 random 8 16384 7 3.35 280.70 35.09 228.0 random 8 8192 15 2.93 343.60 42.95 186.3 random 8 4096 31 2.27 457.39 57.17 139.9 random 8 2048 255 2.79 1535.59 191.95 41.7 random 8 1024 2047 3.16 5436.80 679.60 11.8 random 8 512 4095 3.18 5401.29 675.16 11.8 random 8 256 8191 3.17 5421.19 677.65 11.8 random 8 128 16383 2.93 5855.62 731.95 10.9 random 8 64 131071 3.80 18086.35 2260.79 3.5 random 8 32 262143 3.79 18115.76 2264.47 3.5 random 8 16 524287 3.79 18130.50 2266.31 3.5 random 8 8 1048575 3.78 18160.01 2270.00 3.5 random 8 4 2097151 3.77 18229.20 2278.65 3.5 random 8 2 4194303 3.75 18344.01 2293.00 3.5 random 8 1 8388607 3.61 19031.92 2378.99 3.4 random 8 0.5 16777215 3.43 20039.59 2504.95 3.2 random 8 0.25 33554431 3.05 22495.76 2811.97 2.8 random e # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 1 262144 31 4.07 2043.93 2043.93 3.9 lstream 1 131072 63 3.91 2164.14 2164.14 3.7 lstream 1 65536 127 3.85 2212.68 2212.68 3.6 lstream 1 32768 255 3.89 2196.82 2196.82 3.6 lstream 1 16384 511 3.67 2337.81 2337.81 3.4 lstream 1 8192 1023 3.72 2304.01 2304.01 3.5 lstream 1 4096 2047 3.76 2283.75 2283.75 3.5 lstream 1 2048 4095 3.34 2573.62 2573.62 3.1 lstream 1 1024 16383 3.55 4837.40 4837.40 1.7 lstream 1 512 32767 3.55 4836.15 4836.15 1.7 lstream 1 256 65535 3.56 4825.07 4825.07 1.7 lstream 1 128 131071 3.55 4833.47 4833.47 1.7 lstream 1 64 262143 3.21 5357.65 5357.65 1.5 lstream 1 32 524287 2.88 5955.92 5955.92 1.3 lstream 1 16 1048575 2.90 5922.23 5922.23 1.4 lstream 1 8 2097151 2.94 5849.84 5849.84 1.4 lstream 1 4 4194303 2.99 5749.43 5749.43 1.4 lstream 1 2 8388607 2.94 5837.60 5837.60 1.4 lstream 1 1 16777215 2.89 5947.47 5947.47 1.3 lstream 1 0.5 33554431 2.87 5994.30 5994.30 1.3 lstream 1 0.25 67108863 2.87 5994.30 5994.30 1.3 lstream 4 1.049e+06 7 3.95 7620.68 1905.17 4.2 lstream 4 524288 15 4.24 7596.19 1899.05 4.2 lstream 4 262144 15 2.13 7577.86 1894.47 4.2 lstream 4 131072 31 2.17 7656.78 1914.19 4.2 lstream 4 65536 63 2.71 6247.21 1561.80 5.1 lstream 4 32768 127 2.39 7125.75 1781.44 4.5 lstream 4 16384 255 2.37 7206.33 1801.58 4.4 lstream 4 8192 511 2.34 7315.26 1828.82 4.4 lstream 4 4096 1023 2.28 7516.50 1879.12 4.3 lstream 4 2048 2047 2.02 8484.50 2121.12 3.8 lstream 4 262144 15 2.03 7946.02 1986.50 4.0 lstream 4 131072 31 2.05 8112.36 2028.09 3.9 lstream 4 65536 63 2.09 8085.53 2021.38 4.0 lstream 4 32768 127 2.14 7966.08 1991.52 4.0 lstream 4 16384 511 3.64 9411.81 2352.95 3.4 lstream 4 8192 1023 3.91 8776.15 2194.04 3.6 lstream 4 4096 2047 3.90 8796.59 2199.15 3.6 lstream 4 2048 4095 3.46 9940.04 2485.01 3.2 lstream 4 1024 16383 3.56 19299.41 4824.85 1.7 lstream 4 512 32767 3.56 19322.27 4830.57 1.7 lstream 4 256 65535 3.55 19333.71 4833.43 1.7 lstream 4 128 131071 3.55 19339.45 4834.86 1.7 lstream 4 64 262143 3.21 21389.56 5347.39 1.5 lstream 4 32 524287 2.88 23849.13 5962.28 1.3 lstream 4 16 1048575 2.88 23874.66 5968.66 1.3 lstream 4 8 2097151 2.89 23815.26 5953.81 1.3 lstream 4 4 4194303 2.91 23647.13 5911.78 1.4 lstream 4 2 8388607 2.95 23334.15 5833.54 1.4 lstream 4 1 16777215 2.89 23781.45 5945.36 1.3 lstream 4 0.5 33554431 2.87 23985.80 5996.45 1.3 lstream 4 0.25 67108863 2.87 23960.06 5990.02 1.3 lstream 8 262144 15 2.16 14932.31 1866.54 4.3 lstream 8 131072 31 2.21 15043.78 1880.47 4.3 lstream 8 65536 63 2.30 14719.99 1840.00 4.3 lstream 8 32768 127 2.39 14251.50 1781.44 4.5 lstream 8 16384 255 2.28 15009.15 1876.14 4.3 lstream 8 8192 511 2.05 16732.10 2091.51 3.8 lstream 8 4096 1023 2.11 16291.85 2036.48 3.9 lstream 8 2048 4095 3.91 17555.96 2194.49 3.6 lstream 8 1024 16383 3.53 38878.85 4859.86 1.6 lstream 8 512 32767 3.53 38925.22 4865.65 1.6 lstream 8 256 65535 3.53 38903.21 4862.90 1.6 lstream 8 128 131071 3.51 39130.75 4891.34 1.6 lstream 8 64 262143 3.18 43262.57 5407.82 1.5 lstream 8 16 1048575 2.84 48369.87 6046.23 1.3 lstream 8 8 2097151 2.85 48230.60 6028.83 1.3 lstream 8 4 4194303 2.87 47885.88 5985.73 1.3 lstream 8 2 8388607 2.91 47260.91 5907.61 1.4 lstream 8 1 16777215 2.85 48195.93 6024.49 1.3 lstream 8 0.5 33554431 2.83 48527.59 6065.95 1.3 lstream 8 0.25 67108863 2.83 48527.60 6065.95 1.3 lstream e # comp1 4 dualopt885 8 262144 1 6.18 347.51 43.44 184.2 random 8 131072 1 2.74 391.56 48.95 163.4 random 8 65536 3 3.82 421.59 52.70 151.8 random 8 32768 7 3.94 476.75 59.59 134.2 random 8 16384 15 3.45 583.03 72.88 109.8 random 8 8192 31 2.87 724.59 90.57 88.3 random 8 4096 63 2.57 822.44 102.81 77.8 random 8 2048 255 3.67 1165.12 145.64 54.9 random 8 1024 1023 2.14 4008.90 501.11 16.0 random 8 512 8191 3.56 9643.66 1205.46 6.6 random 8 256 16383 2.98 11542.72 1442.84 5.5 random 8 128 65535 3.99 17213.23 2151.65 3.7 random 8 64 262143 2.49 55234.28 6904.29 1.2 random 8 32 524287 2.48 55321.23 6915.15 1.2 random 8 16 1048575 2.49 55234.44 6904.31 1.2 random 8 8 2097151 2.49 55147.89 6893.49 1.2 random 8 4 4194303 2.50 54975.57 6871.95 1.2 random 8 2 8388607 2.51 54804.31 6850.54 1.2 random 8 1 16777215 2.54 54213.20 6776.65 1.2 random 8 0.5 33554431 2.59 53148.60 6643.57 1.2 random 8 0.25 67108863 2.69 51065.85 6383.23 1.3 random e 8 262144 15 4.04 7982.90 997.86 8.0 lstream 8 131072 15 2.37 6803.91 850.49 9.4 lstream 8 65536 31 2.52 6615.85 826.98 9.7 lstream 8 32768 63 2.35 7203.54 900.44 8.9 lstream 8 16384 255 3.12 10979.62 1372.45 5.8 lstream 8 8192 511 2.11 16227.20 2028.40 3.9 lstream 8 4096 1023 2.32 14818.72 1852.34 4.3 lstream 8 2048 2047 2.73 12595.70 1574.46 5.1 lstream 8 1024 4095 2.30 14955.69 1869.46 4.3 lstream 8 512 65535 3.76 73147.27 9143.41 0.9 lstream 8 256 131071 3.70 74385.00 9298.12 0.9 lstream 8 128 262143 3.69 74542.88 9317.86 0.9 lstream 8 64 524287 3.92 70158.14 8769.77 0.9 lstream 8 32 1048575 3.33 82592.34 10324.04 0.8 lstream 8 16 2097151 3.34 82398.96 10299.87 0.8 lstream 8 8 4194303 3.34 82206.46 10275.81 0.8 lstream 8 4 8388607 3.36 81824.11 10228.01 0.8 lstream 8 2 16777215 3.41 80513.43 10064.18 0.8 lstream 8 1 33554431 3.52 78100.71 9762.59 0.8 lstream 8 0.5 67108863 3.72 73839.18 9229.90 0.9 lstream 8 0.25 67108863 2.07 66511.10 8313.89 1.0 lstream e # "-" u 1:6 t "2Quad-Nehalem-2.0GHz" w lp 5 5,\ # set cpu affinity for cpuset=000000ff core(0)=0 size=1024 # mpi=8 mem/task= 1024.000 MB int64=8B mintime[s]=10 # measure: BW = bandwidth, aBW = aggregate BW, lat = latency # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 8 1.04858e+06 1 23.21 370.17 46.27 172.9 random 8 524288 1 10.65 403.36 50.42 158.7 random 8 262144 3 13.88 464.08 58.01 137.9 random 8 131072 7 13.84 543.06 67.88 117.9 random 8 65536 15 13.49 596.98 74.62 107.2 random 8 32768 31 13.14 633.44 79.18 101.0 random 8 16384 63 12.74 663.59 82.95 96.4 random 8 8192 127 12.09 704.81 88.10 90.8 random 8 4096 255 10.75 796.29 99.54 80.4 random 8 2048 1023 15.20 1129.12 141.14 56.7 random 8 1024 4095 13.86 2478.15 309.77 25.8 random 8 512 16383 15.49 4436.34 554.54 14.4 random 8 256 65535 11.75 11693.94 1461.74 5.5 random 8 128 262143 18.37 14960.46 1870.06 4.3 random 8 64 524287 15.28 17995.00 2249.37 3.6 random 8 32 2097151 17.23 31904.24 3988.03 2.0 random 8 16 4194303 17.02 32300.52 4037.57 2.0 random 8 8 8388607 16.79 32737.54 4092.19 2.0 random 8 4 16777215 16.34 33639.51 4204.94 1.9 random 8 2 33554431 15.44 35601.56 4450.19 1.8 random 8 1 67108863 13.64 40299.85 5037.48 1.6 random 8 0.5 134217727 13.34 41221.81 5152.73 1.6 random 8 0.25 268435455 13.74 40008.61 5001.08 1.6 random e # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 8 1.04858e+06 31 13.01 20468.66 2558.58 3.1 lstream 8 524288 63 13.21 20481.98 2560.25 3.1 lstream 8 262144 127 13.31 20491.13 2561.39 3.1 lstream 8 131072 255 13.36 20488.41 2561.05 3.1 lstream 8 65536 511 13.39 20492.34 2561.54 3.1 lstream 8 32768 1023 13.40 20492.05 2561.51 3.1 lstream 8 16384 2047 13.41 20484.99 2560.62 3.1 lstream 8 8192 4095 13.42 20480.00 2560.00 3.1 lstream 8 4096 8191 13.42 20484.21 2560.53 3.1 lstream 8 2048 16383 13.01 21133.26 2641.66 3.0 lstream 8 1024 65535 13.70 40118.08 5014.76 1.6 lstream 8 512 262143 17.39 63209.58 7901.20 1.0 lstream 8 256 524287 17.37 63309.14 7913.64 1.0 lstream 8 128 1048575 17.26 63718.06 7964.76 1.0 lstream 8 64 2097151 17.27 63668.59 7958.57 1.0 lstream 8 32 4194303 17.29 63583.46 7947.93 1.0 lstream 8 16 8388607 17.33 63438.74 7929.84 1.0 lstream 8 8 16777215 17.42 63113.50 7889.19 1.0 lstream 8 4 33554431 17.61 62422.28 7802.79 1.0 lstream 8 2 67108863 17.98 61136.43 7642.05 1.0 lstream 8 1 134217727 18.66 58926.81 7365.85 1.1 lstream 8 0.5 268435455 17.38 63269.21 7908.65 1.0 lstream 8 0.25 536870911 17.51 62780.86 7847.61 1.0 lstream e # P4-2.6GHz/2-FSB4*100MHz # mpi=2 mem/task= 256.000 MB int32=4B mintime[s]=4 # measure: BW = bandwidth, aBW = aggregate BW, lat = latency # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] lat*MHz 1 262144 1 13.20 20.34 20.34 196.6 random 255 clk/4B 1 131072 1 5.80 23.15 23.15 172.8 random 1 65536 3 7.99 25.21 25.21 158.7 random 1 32768 3 4.18 24.09 24.09 166.0 random 1 16384 7 4.34 27.07 27.07 147.8 random 1 8192 15 4.87 25.82 25.82 154.9 random 1 4096 31 4.37 29.74 29.74 134.5 random 1 2048 63 4.11 32.11 32.11 124.6 random 1 1024 255 6.01 44.47 44.47 90.0 random 1 512 1023 5.68 94.47 94.47 42.3 random 1 256 8191 7.74 277.50 277.50 14.4 random 1 128 16383 7.98 269.16 269.16 14.9 random 1 64 32767 7.04 304.90 304.90 13.1 random 1 32 65535 6.15 349.07 349.07 11.5 random 1 16 131071 5.05 425.19 425.19 9.4 random 1 8 1048575 4.06 2114.87 2114.87 1.9 random = 2.6 clks/random 1 4 2097151 4.56 1883.90 1883.90 2.1 random 1 2 8388607 8.09 2123.42 2123.42 1.9 random 1 1 8388607 4.47 1922.35 1922.35 2.1 random 1 0.5 16777215 4.79 1792.01 1792.01 2.2 random 1 0.25 33554431 4.96 1730.76 1730.76 2.3 random 1 0.125 67108863 5.66 1517.63 1517.63 2.6 random # 2 262144 1 21.73 24.70 12.35 323.9 random # 2 131072 1 9.63 27.86 13.93 287.1 random # 2 65536 1 4.03 33.30 16.65 240.2 random # 2 32768 3 5.62 35.80 17.90 223.5 random # 2 16384 7 6.35 36.96 18.48 216.4 random # 2 8192 15 6.67 37.72 18.86 212.1 random # 2 4096 31 6.80 38.26 19.13 209.1 random # 2 2048 63 6.41 41.20 20.60 194.2 random # 2 1024 127 5.83 45.72 22.86 175.0 random # 2 512 255 4.58 58.32 29.16 137.2 random # 2 256 1023 4.71 113.99 57.00 70.2 random # 2 128 8191 5.72 375.19 187.59 21.3 random # 2 64 16383 5.19 413.68 206.84 19.3 random # 2 32 32767 4.65 461.63 230.81 17.3 random # 2 16 65535 4.16 515.80 257.90 15.5 random # 2 8 262143 5.51 779.86 389.93 10.3 random # 2 4 2097151 6.02 2854.69 1427.35 2.8 random # 2 2 4194303 5.38 3194.60 1597.30 2.5 random # 2 1 8388607 5.30 3242.75 1621.37 2.5 random # 2 0.5 16777215 5.57 3086.14 1543.07 2.6 random # 2 0.25 33554431 5.91 2904.51 1452.25 2.8 random # 2 0.125 67108863 6.68 2570.35 1285.18 3.1 random e # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 1 262144 31 5.37 1550.73 1550.73 2.6 lstream = 3.4 clk/4B 1 131072 63 5.43 1558.29 1558.29 2.6 lstream 1 65536 127 5.45 1564.75 1564.75 2.6 lstream 1 32768 255 5.40 1584.25 1584.25 2.5 lstream 1 16384 511 5.46 1569.75 1569.75 2.5 lstream 1 8192 1023 5.43 1579.22 1579.22 2.5 lstream 1 4096 2047 5.62 1526.98 1526.98 2.6 lstream 1 2048 4095 5.56 1544.28 1544.28 2.6 lstream 1 1024 8191 5.38 1595.46 1595.46 2.5 lstream 1 512 16383 4.53 1896.22 1896.22 2.1 lstream 1 256 65535 7.31 2349.95 2349.95 1.7 lstream 1 128 131071 6.98 2462.31 2462.31 1.6 lstream 1 64 262143 6.97 2465.83 2465.83 1.6 lstream 1 32 524287 6.96 2467.55 2467.55 1.6 lstream = 2.1 clk/4B 1 16 1048575 6.83 2514.62 2514.62 1.6 lstream 1 8 2097151 6.96 2467.18 2467.18 1.6 lstream 1 4 4194303 7.10 2418.05 2418.05 1.7 lstream 1 2 8388607 7.13 2408.74 2408.74 1.7 lstream 1 1 16777215 7.05 2436.88 2436.88 1.6 lstream 1 0.5 33554431 7.48 2297.29 2297.29 1.7 lstream 1 0.25 33554431 4.02 2134.89 2134.89 1.9 lstream # 2 262144 31 7.58 2194.73 1097.36 3.6 lstream # 2 131072 63 7.67 2205.48 1102.74 3.6 lstream # 2 65536 127 7.76 2197.61 1098.80 3.6 lstream # 2 32768 255 7.79 2196.87 1098.44 3.6 lstream # 2 16384 511 7.81 2195.40 1097.70 3.6 lstream # 2 8192 1023 7.82 2195.52 1097.76 3.6 lstream # 2 4096 2047 7.83 2193.87 1096.94 3.6 lstream # 2 2048 4095 7.84 2191.58 1095.79 3.7 lstream # 2 1024 8191 7.85 2188.72 1094.36 3.7 lstream # 2 512 16383 7.79 2205.36 1102.68 3.6 lstream # 2 256 32767 7.44 2310.44 1155.22 3.5 lstream # 2 128 65535 6.85 2508.95 1254.47 3.2 lstream # 2 64 131071 6.70 2565.36 1282.68 3.1 lstream # 2 32 262143 6.69 2566.34 1283.17 3.1 lstream # 2 16 524287 6.70 2563.37 1281.69 3.1 lstream # 2 8 1048575 6.72 2554.72 1277.36 3.1 lstream # 2 4 2097151 6.76 2542.57 1271.28 3.1 lstream # 2 2 4194303 6.78 2535.63 1267.81 3.2 lstream # 2 1 8388607 6.87 2500.85 1250.43 3.2 lstream # 2 0.5 16777215 7.05 2436.04 1218.02 3.3 lstream # 2 0.25 33554431 7.43 2311.79 1155.90 3.5 lstream # 2 0.125 33554431 4.10 2096.46 1048.23 3.8 lstream e # altix330 + setaffinity 8*(2*4GB)=64GB # mpi=16 mem/task= 256.000 MB int64=8B mintime[s]=5 # measure: BW = bandwidth, aBW = aggregate BW, lat = latency # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 4 2.09715e+06 1 45.98 186.81 46.70 171.3 random 0x1111 4 524288 1 9.68 221.92 55.48 144.2 random 4 262144 1 4.80 223.88 55.97 142.9 random 4 131072 3 7.09 227.08 56.77 140.9 random 4 262144 1 5.58 192.52 48.13 166.2 random 0xffff 4 131072 3 8.23 195.71 48.93 163.5 random 4 65536 7 9.31 201.74 50.44 158.6 random 4 32768 15 9.34 215.66 53.91 148.4 random 4 16384 31 8.45 246.06 61.51 130.1 random 4 8192 63 6.14 344.22 86.05 93.0 random 4 4096 255 5.46 783.58 195.89 40.8 random 4 2048 2047 6.63 2591.06 647.76 12.4 random 4 1024 4095 5.97 2875.58 718.89 11.1 random 4 512 16383 9.89 3475.14 868.78 9.2 random 4 256 32767 6.93 4959.34 1239.83 6.5 random 4 128 131071 7.97 8622.39 2155.60 3.7 random 4 64 262143 7.18 9569.24 2392.31 3.3 random 4 32 524287 5.84 11770.86 2942.72 2.7 random 4 16 2097151 5.76 23846.65 5961.66 1.3 random 4 8 4194303 5.76 23855.32 5963.83 1.3 random 4 4 8388607 5.79 23729.82 5932.46 1.3 random 4 2 16777215 5.81 23656.65 5914.16 1.4 random 4 1 33554431 5.88 23386.14 5846.53 1.4 random 4 0.5 67108863 6.03 22806.45 5701.61 1.4 random 4 0.25 134217727 6.28 21883.13 5470.78 1.5 random 8 262144 1 5.58 385.04 48.13 166.2 random cpuset=0x00ff 8 131072 3 8.23 391.50 48.94 163.5 random 8 65536 7 9.28 404.90 50.61 158.1 random 8 32768 15 9.34 431.24 53.91 148.4 random 8 16384 31 8.44 492.72 61.59 129.9 random 8 8192 63 6.18 683.73 85.47 93.6 random 8 4096 255 5.19 1648.99 206.12 38.8 random 8 2048 2047 6.67 5146.30 643.29 12.4 random 8 1024 4095 5.97 5756.49 719.56 11.1 random 8 512 16383 9.87 6965.51 870.69 9.2 random 8 256 32767 5.97 11514.38 1439.30 5.6 random 8 128 131071 7.97 17245.79 2155.72 3.7 random 8 64 262143 7.17 19179.77 2397.47 3.3 random 8 32 524287 5.84 23529.78 2941.22 2.7 random 8 16 2097151 5.77 47673.46 5959.18 1.3 random 8 8 4194303 5.76 47711.93 5963.99 1.3 random 8 4 8388607 5.78 47581.71 5947.71 1.3 random 8 2 16777215 5.81 47312.35 5914.04 1.4 random 8 1 33554431 5.88 46773.70 5846.71 1.4 random 8 0.5 67108863 6.01 45726.06 5715.76 1.4 random 8 0.25 134217727 6.28 43765.90 5470.74 1.5 random 16 262144 1 5.58 769.24 48.08 166.4 random cpuset=0xffff 16 131072 3 8.24 782.30 48.89 163.6 random 16 65536 7 9.31 807.40 50.46 158.5 random 16 32768 15 9.35 861.36 53.83 148.6 random 16 16384 31 8.46 983.77 61.49 130.1 random 16 8192 63 6.19 1365.42 85.34 93.7 random 16 4096 255 5.21 3282.94 205.18 39.0 random 16 2048 2047 6.99 9823.57 613.97 13.0 random 16 1024 4095 5.97 11508.16 719.26 11.1 random 16 512 16383 9.85 13956.43 872.28 9.2 random 16 256 32767 6.47 21234.36 1327.15 6.0 random 16 128 131071 8.00 34376.03 2148.50 3.7 random 16 64 262143 7.16 38364.17 2397.76 3.3 random 16 32 524287 5.84 47078.60 2942.41 2.7 random 16 16 2097151 5.77 95200.11 5950.01 1.3 random 16 8 4194303 5.76 95422.76 5963.92 1.3 random 16 4 8388607 5.78 95165.81 5947.86 1.3 random 16 2 16777215 5.81 94624.06 5914.00 1.4 random 16 1 33554431 5.88 93546.33 5846.65 1.4 random 16 0.5 67108863 6.01 91451.26 5715.70 1.4 random 16 0.25 134217727 6.28 87534.64 5470.91 1.5 random e # Tasks Mem/Task[kB] loops t[s] aBW[MB/s] BW[MB/s] lat[ns] 4 2.09715e+06 3 8.96 2875.99 719.00 11.1 lstream 0x1111 4 1.04858e+06 3 4.48 2875.50 718.88 11.1 lstream 4 524288 7 5.23 2875.66 718.92 11.1 lstream 4 262144 15 5.60 2875.74 718.93 11.1 lstream 4 262144 15 5.68 2836.97 709.24 11.3 lstream 0xffff 4 131072 31 5.88 2828.11 707.03 11.3 lstream 4 65536 63 5.96 2836.75 709.19 11.3 lstream 4 32768 127 6.01 2836.23 709.06 11.3 lstream 4 16384 255 6.03 2836.37 709.09 11.3 lstream 4 8192 511 5.99 2864.52 716.13 11.2 lstream 4 4096 2047 8.10 4238.95 1059.74 7.5 lstream 4 2048 16383 9.65 14235.33 3558.83 2.2 lstream 4 1024 32767 9.54 14407.22 3601.80 2.2 lstream 4 512 65535 9.47 14515.62 3628.91 2.2 lstream 4 256 131071 8.88 15484.18 3871.04 2.1 lstream 4 128 262143 7.92 17363.13 4340.78 1.8 lstream 4 64 524287 7.92 17363.40 4340.85 1.8 lstream 4 32 1048575 7.90 17391.55 4347.89 1.8 lstream 4 16 2097151 5.78 23761.01 5940.25 1.3 lstream 4 8 4194303 5.78 23789.68 5947.42 1.3 lstream 4 4 8388607 5.78 23789.87 5947.47 1.3 lstream 4 2 16777215 5.81 23655.47 5913.87 1.4 lstream 4 1 33554431 5.89 23327.21 5831.80 1.4 lstream 4 0.5 67108863 6.01 22863.06 5715.76 1.4 lstream 4 0.25 134217727 6.28 21883.14 5470.78 1.5 lstream 8 262144 15 5.68 5674.67 709.33 11.3 lstream cpuset=0x00ff 8 131072 31 5.87 5672.89 709.11 11.3 lstream 8 65536 63 5.96 5672.18 709.02 11.3 lstream 8 32768 127 6.01 5670.79 708.85 11.3 lstream 8 16384 255 6.04 5670.00 708.75 11.3 lstream 8 8192 511 5.99 5724.57 715.57 11.2 lstream 8 4096 2047 8.74 7859.99 982.50 8.1 lstream 8 2048 16383 9.59 28674.60 3584.32 2.2 lstream 8 1024 32767 9.54 28812.73 3601.59 2.2 lstream 8 512 65535 9.45 29095.11 3636.89 2.2 lstream 8 256 131071 8.47 32462.47 4057.81 2.0 lstream 8 128 262143 7.92 34723.66 4340.46 1.8 lstream 8 64 524287 7.90 34792.56 4349.07 1.8 lstream 8 32 1048575 7.90 34781.95 4347.74 1.8 lstream 8 16 2097151 5.78 47524.96 5940.62 1.3 lstream 8 8 4194303 5.76 47700.09 5962.51 1.3 lstream 8 4 8388607 5.78 47576.88 5947.11 1.3 lstream 8 2 16777215 5.81 47311.24 5913.90 1.4 lstream 8 1 33554431 5.88 46772.04 5846.50 1.4 lstream 8 0.5 67108863 6.01 45725.78 5715.72 1.4 lstream 8 0.25 134217727 6.28 43765.29 5470.66 1.5 lstream 16 262144 15 5.68 11341.69 708.86 11.3 lstream 16 131072 31 5.87 11340.09 708.76 11.3 lstream 16 65536 63 5.97 11336.82 708.55 11.3 lstream 16 32768 127 6.02 11333.65 708.35 11.3 lstream 16 16384 255 6.04 11328.55 708.03 11.3 lstream 16 8192 511 5.85 11721.16 732.57 10.9 lstream 16 4096 2047 7.64 17981.32 1123.83 7.1 lstream 16 2048 8191 6.12 44874.74 2804.67 2.9 lstream 16 1024 32767 9.54 57627.75 3601.73 2.2 lstream 16 512 65535 9.45 58197.27 3637.33 2.2 lstream 16 256 131071 8.65 63533.66 3970.85 2.0 lstream 16 128 262143 7.92 69450.30 4340.64 1.8 lstream 16 64 524287 7.90 69590.96 4349.43 1.8 lstream 16 32 1048575 7.90 69565.73 4347.86 1.8 lstream 16 16 2097151 5.78 95048.27 5940.52 1.3 lstream 16 8 4194303 5.76 95406.64 5962.91 1.3 lstream 16 4 8388607 5.78 95155.95 5947.25 1.3 lstream 16 2 16777215 5.81 94624.61 5914.04 1.4 lstream 16 1 33554431 5.88 93543.76 5846.48 1.4 lstream 16 0.5 67108863 6.01 91451.78 5715.74 1.4 lstream 16 0.25 134217727 6.28 87533.85 5470.87 1.5 lstream e pause -1