#!/usr/bin/gnuplot # Joerg S. 2015-12-15 set term png set output "a.png" set key bottom left set ylabel "transfer speed per node [GB/s]" set xlabel "nodes" set label "def. node" at 145,2.4 set title "alltoall-speed in a non-blocking QDR-IB-network (11+6 36port-switches)" #set logscale x plot [2:180] [0.2:3.5] 3.15 t "uniform network 3.15 GB/s",\ "-" u 1:7 t "switch port sorted" w p 2 2, "-" u 1:7 t "worst node order (?)" w p 3 3,\ "-" u 1:7 t "default slurm order" w p 1 1, "-" u 1:7 t "random order" w p 4 4 # nodes ranks/n latency throughput 4 1 8B 1.64 us 64MB 3.15 GB/s 8 1 8B 1.50 us 64MB 3.15 GB/s 16 1 8B 1.47 us 64MB 3.15 GB/s 2x17=e08e10+gap -H c138,c139,c140,c141,c159,c160,c161,c163,c102,c103,c104,c105,c106,c125,c126,c128 bad c162 18 1 8B 1.29 us 64MB 3.15 GB/s 12,14,16-22,36-44 # e03 #24 1 8B 1.50 us 64MB 3.09 GB/s c0[16-22,36-40,47-54,67-70] # e03+e05 24 1 8B 1.45 us 64MB 3.15 GB/s -hostfile nodelist7x18uniform -np 24 27 1 8B 1.53 us 64MB 3.15 GB/s c[018-022,041-044,052-054,056,071-075,094-097,115-119] first9_of_e03,05,07 32 1 8B 1.48 us 64MB 3.15 GB/s -hostfile nodelist7x18uniform -np 32 #32 1 8B 1.57 us 64MB 3.10 GB/s c0[16-22,36-44,47-54,67-74] # e03+e05 32 1 8B 1.57 us 64MB 3.15 GB/s 16-22,36-44,47-54,67-74 # e03+e05 32 1 8B 1.55 us 64MB 3.13 GB/s 12,14,16-22,36-42,47-54,56,67-73 36 1 8B 1.55 us 64MB 3.13 GB/s 12,14,16-22,36-44,47-54,56,67-75 36 1 8B 1.57 us 64MB 3.15 GB/s uniform first9ofe3+5+7+11 c[018-022,041-044,052-054,056,071-075,094-097,115-119,146-150,169-172] # there is still some disorder, # because groups of node numbers are not bounded to edge-switches # #54 1 8B 1.58 us 64MB 2.66 GB/s -F hosts_e03_05_07 (sorted by nodenames) 54 1 8B 1.56 us 64MB 3.13 GB/s mpirun -hostfile nodelist7x18uniform -np 54 72 1 8B 1.60 us 64MB 3.14 GB/s -hostfile nodelist7x18uniform -np 72 90 1 8B 1.63 us 64MB 3.08 GB/s -hostfile nodelist7x18uniform -np 90 90 1 8B 1.63 us 64MB 3.13 GB/s -hostfile nodelist7x18uniform -np 90 108 1 8B 1.66 us 64MB 3.14 GB/s -hostfile nodelist7x18uniform -np 108 #117 1 8B 1.66 us 64MB 2.18 GB/s -hostfile nodelist7x18uniform -np 117 119 1 8B 1.71 us 64MB 3.10 GB/s -hostfile nodelist7x17uniform -np 119 119 1 8B 1.73 us 64MB 3.08 GB/s -hostfile nodelist7x17uniform -np 119 #120 1 8B 1.67 us 64MB 2.21 GB/s -hostfile nodelist7x18uniform -np 120 126 1 8B 1.67 us 64MB 3.05 GB/s mpirun -hostfile nodelist7x18uniform 126 1 8B 1.68 us 64MB 3.11 GB/s mpirun -hostfile nodelist7x18uniform 126 1 8B 1.71 us 64MB 3.10 GB/s -hostfile nodelist8x18uniform -np 26 136 1 8B 1.71 us 64MB 3.07 GB/s -hostfile nodelist7x18uniform_no162 every 8th node skipped 136 1 8B 1.71 us 64MB 2.95 GB/s -hostfile nodelist7x18uniform_no162 every 8th node skipped # bad c162 pci-errors 144 1 8B 1.75 us 16MB 2.70 GB/s -hostfile nodelist8x18uniform 144 1 8B 1.75 us 64MB 2.53 GB/s -hostfile nodelist8x18uniform 144 1 8B 1.75 us 64MB 2.49 GB/s -hostfile nodelist8x18uniform # e # nodes ranks/n latency throughput # sbatch -w c[018,019,010,060] -m arbitrary job # but 0=10 1=18 2=19 3=60 4 1 8B 1.62 us 64MB 2.59 GB/s 2ofE03+2*1ofEXX factor=1/1.2 -H c018,... # sbatch -w c[018-019,052,094] # search all 4th slowest nodes # mpirun -H c018,c019,c052,c094 4 1 8B 1.62 us 64MB 2.59 GB/s 2ofE03+2*1ofEXX factor=1/1.2 4 1 8B 1.65 us 64MB 2.59 GB/s 2ofE03+2*1ofEXX factor=1/1.2 # 5 2.40 -H c018,c019,c052,c094,c146 # 5 2.59 -H c018,c019,c052,c094,c130 # c[018-020,052,094,129] 6 1 8B 1.55 us 64MB 1.98 GB/s 3ofE03+3*1ofEXX factor=1/1.6 # 7 2.40 -H c018,c019,c052,c094,c095,c146,c060 # c[018-021,052,094,129,146] 8 1 8B 1.58 us 64MB 1.57 GB/s 4ofE03+4*1ofEXX factor=1/2 # # c[018-022,052,094,129,146,085] 10 1 8B 1.55 us 64MB 1.33 GB/s 5ofE03+5*1ofEXX(~e02) factor=9/21.3 # c[018-022,041-041,052,094,129,146,085,060] 12 1 8B 1.56 us 64MB 1.14 GB/s 6ofE03+6*1ofEXX(~e02) factor=13/40 14 1 8B 1.54 us 64MB 1.02 GB/s 7ofE03+7*1ofEXX(~e02) factor=13/40 16 1 8B 1.56 us 64MB 0.92 GB/s 8ofE03+8*1ofEXX(~e02) factor=15/51.5 17 1 8B 1.62 us 64MB 0.85 GB/s worst!? 8ofE03+9*1ofEXX factor=16/59.5 # c[018-022,041-044,052,094,129,146,010,085,060,102,138] 18 1 8B 1.55 us 64MB 0.82 GB/s worst!? 9ofE03+9*1ofEXX factor=17/65.4 # 9ofE03+10*1ofEXX c[018-022,041-044,052,094,129,146,010,085,060,102,138,001] # 19 1 8B 1.63 us 64MB 0.85 GB/s worst!? 9ofE03+10*1ofEXX factor=18/67 # sbatch -w c[018-022,041-044,052-054,056,071-075,094,129,146,010,085,060,102,138] job 26 1 8B 1.58 us 64MB 0.82 GB/s worst?? 9ofE03,E05+8*1ofEXX factor=3.865 # not sure, that below is the worst node order case # sbatch -w c[018-022,041-044,052,094,129,146,010,085,060,102,138,053,095,130,147,011,105,062,103,139] job # 27 1 8B 1.63 us 64MB 1.18 GB/s worst?? 9ofE03+9*2ofEXX factor=1/2.67 # c[018-022,041-044,052-054,056,071-075,094-097,115-119,129,146,010,085,060,102,138] 34 1 8B 1.59 us 64MB 1.08 GB/s worst?? 9ofE03,5,7+7*1ofEXX factor=3.865 36 1 8B 1.56 us 64MB 1.16 GB/s -hostfile nodelist7xNworst 18+3*6 #36 1 8B 1.56 us 64MB 1.22 GB/s -hostfile nodelist7xNworst 18+3*6 +i*n e # nodes ranks/n latency throughput 4 1 8B 1.64 us 64MB 3.15 GB/s 8 1 8B 1.50 us 64MB 3.15 GB/s 10 1 8B 1.46 us 64MB 3.15 GB/s c[001-010] e01+e02 11 1 8B 1.64 us 64MB 3.15 GB/s c[001,010,018,052,060,085,096,102,129,138,146] all eXX # collisions above N=11 (edge-switches) 12 1 8B 1.59 us 64MB 2.43 GB/s c[001-012] e01+e02+e03 14 1 8B 1.51 us 64MB 2.63 GB/s c[001-014] 16 1 8B 1.57 us 64MB 2.72 GB/s 01-16 16 1 8B 1.57 us 64MB 3.03 GB/s 03-18 18 1 8B 1.58 us 64MB 2.77 GB/s 01-18 18 1 8B 1.56 us 64MB 2.71 GB/s 02-19 20 1 8B 1.61 us 64MB 2.82 GB/s c[001-020] 22 1 8B 1.53 us 64MB 2.46 GB/s c[001-022] 24 1 8B 1.56 us 64MB 2.58 GB/s c[001-024] 32 1 8B 1.61 us 64MB 2.40 GB/s 1+2+28 down, 29-60 32 1 8B 1.62 us 64MB 2.40 GB/s 1+2+28 down, 29-60 32 1 8B 1.62 us 64MB 2.35 GB/s 30-61 32 1 8B 1.62 us 64MB 2.37 GB/s 31-62 32 1 8B 1.62 us 64MB 2.34 GB/s 32-63 32 1 8B 1.62 us 64MB 2.33 GB/s 33-64 32 1 8B 1.62 us 64MB 2.23 GB/s 34-65 32 1 8B 1.61 us 64MB 2.24 GB/s 35-66 32 1 8B 1.62 us 64MB 2.17 GB/s 36-67 32 1 8B 1.63 us 64MB 2.20 GB/s 37-68 45 1 8B 1.62 us 64MB 2.06 GB/s first9ofe3+5+7+11+10 c[018-022,041-044,052-054,056,071-075,094-097,115-119,146-150,169-172,138-141,159-163] 54 1 8B 1.59 us 64MB 2.11 GB/s c[001-054] 64 1 8B 1.66 us 64MB 1.86 GB/s 64 1 8B 1.72 us 64MB 1.86 GB/s 29-92 64 1 8B 1.74 us 64MB 1.86 GB/s 33-96 64 1 8B 1.66 us 64MB 1.79 GB/s 61-124 64 1 8B 1.64 us 64MB 1.94 GB/s 81-144 64 1 8B 1.64 us 64MB 1.85 GB/s 90-153 80 1 8B 1.69 us 64MB 2.00 GB/s c[001-080] 100 1 8B 1.82 us 64MB 1.73 GB/s c[001-100] 108 1 8B 1.73 us 64MB 1.53 GB/s c[001-108] 120 1 8B 1.70 us 64MB 1.77 GB/s c[001-120] 128 1 8B 1.79 us 64MB 1.91 GB/s 128 1 8B 1.75 us 64MB 1.91 GB/s disordered 29-156 128 1 8B 1.83 us 64MB 1.90 GB/s disordered 30-157 144 1 8B 1.76 us 64MB 1.72 GB/s c[001-144] 160 1 8B 1.77 us 64MB 1.63 GB/s # 001-160 vm e # nodes ranks/n latency throughput 9 1 8B 1.63 us 64MB 3.15 GB/s nodelist7x18uniform_shuffled -np 9 12 1 8B 1.66 us 64MB 3.15 GB/s nodelist7x18uniform_shuffled -np 12 14 1 8B 1.63 us 64MB 3.01 GB/s nodelist7x18uniform_shuffled -np 14 15 1 8B 1.64 us 64MB 2.60 GB/s nodelist7x18uniform_shuffled -np 15 16 1 8B 1.63 us 64MB 2.59 GB/s nodelist7x18uniform_shuffled -np 16 17 1 8B 1.63 us 64MB 2.53 GB/s nodelist7x18uniform_shuffled -np 17 18 1 8B 1.62 us 64MB 2.60 GB/s nodelist7x18uniform_shuffled -np 18 21 1 8B 1.64 us 64MB 2.75 GB/s nodelist7x18uniform_shuffled -np 21 24 1 8B 1.62 us 64MB 2.50 GB/s nodelist7x18uniform_shuffled -np 24 27 1 8B 1.65 us 64MB 2.29 GB/s nodelist7x18uniform_shuffled -np 27 30 1 8B 1.62 us 64MB 2.33 GB/s nodelist7x18uniform_shuffled -np 30 33 1 8B 1.65 us 64MB 2.27 GB/s nodelist7x18uniform_shuffled -np 33 36 1 8B 1.65 us 64MB 2.21 GB/s nodelist7x18uniform_shuffled -np 36 45 1 8B 1.65 us 64MB 2.15 GB/s nodelist7x18uniform_shuffled -np 45 54 1 8B 1.67 us 64MB 1.93 GB/s nodelist7x18uniform_shuffled -np 54 63 1 8B 1.65 us 64MB 1.91 GB/s nodelist7x18uniform_shuffled -np 63 72 1 8B 1.65 us 64MB 1.85 GB/s nodelist7x18uniform_shuffled -np 72 81 1 8B 1.67 us 64MB 1.74 GB/s nodelist7x18uniform_shuffled -np 81 90 1 8B 1.66 us 64MB 1.68 GB/s nodelist7x18uniform_shuffled -np 90 99 1 8B 1.67 us 64MB 1.64 GB/s nodelist7x18uniform_shuffled -np 99 108 1 8B 1.72 us 64MB 1.61 GB/s nodelist7x18uniform_shuffled -np 108 114 1 8B 1.69 us 64MB 1.59 GB/s nodelist7x18uniform_shuffled -np 114 120 1 8B 1.69 us 64MB 1.56 GB/s nodelist7x18uniform_shuffled -np 120 126 1 8B 1.70 us 64MB 1.52 GB/s nodelist7x18uniform_shuffled -np 126 126 1 8B 1.68 us 64MB 1.53 GB/s nodelist7x18uniform_shuffled2x -np 126 135 1 8B 1.71 us 64MB 1.48 GB/s -hostfile nodelist8x18uniform_shuffled 144 1 8B 1.74 us 64MB 1.46 GB/s -hostfile nodelist8x18uniform_shuffled e pause -1