#!/usr/bin/bash

# map DGX1 GPU to core, along with HCA affinity
# only run # of ranks that corresponds to the number of GPUs

# cat  /sys/class/infiniband/mlx5_*/device/local_cpulist
# 0-19,40-59
# 0-19,40-59
# 20-39,60-79
# 20-39,60-79


case $OMPI_COMM_WORLD_LOCAL_RANK in
      0) core=0,40;    export UCX_NET_DEVICES=mlx5_0:1 ;;
      1) core=1,41;    export UCX_NET_DEVICES=mlx5_0:1 ;;
      2) core=2,42;    export UCX_NET_DEVICES=mlx5_1:1 ;;
      3) core=3,43;    export UCX_NET_DEVICES=mlx5_1:1 ;;
      4) core=20,60;   export UCX_NET_DEVICES=mlx5_2:1 ;;
      5) core=21,61;   export UCX_NET_DEVICES=mlx5_2:1 ;;
      6) core=22,62;   export UCX_NET_DEVICES=mlx5_3:1 ;;
      7) core=23,63;   export UCX_NET_DEVICES=mlx5_3:1 ;;
      *) echo "May only be run with max of 8 PPN";       exit -1; ;;
esac

echo `hostname` $OMPI_COMM_WORLD_LOCAL_RANK $UCX_NET_DEVICES >> outfile.txt

      taskset -c $core $*

