University of Cape Town
UCT High Performance Computing SLURM Cluster

Blog       UCT-HPC       Citations       Contact us       Help
Fri Dec 15 04:35:01 SAST 2017

CLUSTER LOAD Hold mouse over bars and indicators for more info. Lamp status
Partitions   
ucthi   uctlo
ucthimem
uctlomem

400

0

401

0

402

0

406

0

407

0

408

0

409

0

410

0

411

0

412

0

413

0

414

0

415

0

416

0

417

0

418

0
Disk space:
 / = 11% of 96G
/home = 80% of 504G
/scratch = 88% of 24T
Users logged in:

Head Node load: 0.15     Head Node RAM free: 94%
Currently computing: 0 hours     Jobs running: 0     Jobs queued: 0
Efficiency: 0%    System overview    Queue accounting    Graphs

JOBS RUNNING
#  JOBID PARTITION              NAME     USER  ACCOUNT      STATE       TIME  CPUS  NODES     NODELIST(REASON)      QOS PRIORITY     CPU TIME
-----------------------------------------------------------------------------------------------------------------------------------------------

CLUSTER STATUS
PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST
 ucthi        up 208-08:00:      3   idle srvcnthpc[400-402]
 uctlo        up 208-08:00:      3   idle srvcnthpc[400-402]
 ucthimem*    up 208-08:00:      1  drain srvcnthpc417
 ucthimem*    up 208-08:00:     12   idle srvcnthpc[406-416,418]
 uctlomem     up 208-08:00:      1  drain srvcnthpc417
 uctlomem     up 208-08:00:     12   idle srvcnthpc[406-416,418]

PartitionName=ucthi
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[400-402]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:1
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=36 TotalNodes=3 SelectTypeParameters=NONE
    DefMemPerCPU=1000 MaxMemPerCPU=2000
 
 PartitionName=uctlo
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[400-402]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:1
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=36 TotalNodes=3 SelectTypeParameters=NONE
    DefMemPerCPU=1000 MaxMemPerCPU=2000
 
 PartitionName=ucthimem
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=YES QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[406-418]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:4
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=104 TotalNodes=13 SelectTypeParameters=NONE
    DefMemPerCPU=2000 MaxMemPerCPU=4000
 
 PartitionName=uctlomem
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[406-418]
    PriorityJobFactor=10 PriorityTier=10 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:4
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=104 TotalNodes=13 SelectTypeParameters=NONE
    DefMemPerCPU=2000 MaxMemPerCPU=4000
 

WORKER NODE STATUS
NodeName=srvcnthpc400 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.15
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc400 NodeHostName=srvcnthpc400 Version=17.02
   OS=Linux RealMemory=64000 AllocMem=0 FreeMem=60557 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-10-06T11:59:58 SlurmdStartTime=2017-10-06T12:00:18
   CfgTRES=cpu=12,mem=62.50G
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc401 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.12
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc401 NodeHostName=srvcnthpc401 Version=17.02
   OS=Linux RealMemory=22700 AllocMem=0 FreeMem=20733 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-10-06T14:48:46 SlurmdStartTime=2017-10-06T14:48:57
   CfgTRES=cpu=12,mem=22700M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc402 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.22
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc402 NodeHostName=srvcnthpc402 Version=17.02
   OS=Linux RealMemory=22700 AllocMem=0 FreeMem=20711 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-10-06T14:36:34 SlurmdStartTime=2017-10-06T14:36:53
   CfgTRES=cpu=12,mem=22700M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc406 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc406 NodeHostName=srvcnthpc406 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=45071 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:58:59 SlurmdStartTime=2017-10-06T11:59:33
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc407 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc407 NodeHostName=srvcnthpc407 Version=17.02
   OS=Linux RealMemory=48000 AllocMem=0 FreeMem=45068 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:57:47 SlurmdStartTime=2017-10-06T11:58:10
   CfgTRES=cpu=8,mem=48000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc408 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc408 NodeHostName=srvcnthpc408 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29074 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:59:01 SlurmdStartTime=2017-10-06T14:09:04
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc409 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc409 NodeHostName=srvcnthpc409 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29210 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:00:40 SlurmdStartTime=2017-10-06T12:01:10
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc410 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc410 NodeHostName=srvcnthpc410 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29221 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:04:20 SlurmdStartTime=2017-10-06T12:04:59
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc411 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc411 NodeHostName=srvcnthpc411 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29240 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:02:16 SlurmdStartTime=2017-10-06T12:02:45
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc412 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc412 NodeHostName=srvcnthpc412 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29231 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:02:11 SlurmdStartTime=2017-10-06T12:02:38
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc413 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc413 NodeHostName=srvcnthpc413 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29207 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:59:35 SlurmdStartTime=2017-10-06T12:00:14
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc414 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc414 NodeHostName=srvcnthpc414 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29137 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:00:48 SlurmdStartTime=2017-10-06T14:08:23
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc415 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc415 NodeHostName=srvcnthpc415 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29074 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:58:34 SlurmdStartTime=2017-10-06T11:58:56
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc416 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc416 NodeHostName=srvcnthpc416 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=29158 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T12:00:35 SlurmdStartTime=2017-10-06T12:01:04
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc417 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8,gpu:kepler:2
   NodeAddr=srvcnthpc417 NodeHostName=srvcnthpc417 Version=17.02
   OS=Linux RealMemory=48000 AllocMem=0 FreeMem=45060 Sockets=2 Boards=1
   State=IDLE+DRAIN ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:56:18 SlurmdStartTime=2017-10-06T11:56:41
   CfgTRES=cpu=8,mem=48000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
   Reason=gres/gpu count too low (0 < 2) [root@2017-10-30T14:05:40]

NodeName=srvcnthpc418 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc418 NodeHostName=srvcnthpc418 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=27835 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-10-06T11:57:44 SlurmdStartTime=2017-10-06T11:58:07
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s