University of Cape Town
UCT High Performance Computing SLURM Cluster

Blog       UCT-HPC       Citations       Contact us       Help
Wed Aug 16 17:05:01 SAST 2017

CLUSTER LOAD Hold mouse over bars and indicators for more info. Lamp status
Partitions   
ucthi   uctlo
ucthimem
uctlomem

400

0

401

0

402

0

406

0

407

0

408

0

409

0

410

0

411

0

412

0

413

0

414

0

415

0

416

0

417

0

418

0
Disk space:
 / = 10% of 96G
/home = 80% of 504G
/scratch = 83% of 24T
Users logged in:

Head Node load: 0.11     Head Node RAM free: 96%
Currently computing: 0 hours     Jobs running: 0     Jobs queued: 0
Efficiency: 0%    System overview    Queue accounting    Graphs

JOBS RUNNING
#  JOBID PARTITION              NAME     USER  ACCOUNT      STATE       TIME  CPUS  NODES     NODELIST(REASON)      QOS PRIORITY     CPU TIME
-----------------------------------------------------------------------------------------------------------------------------------------------

CLUSTER STATUS
PARTITION AVAIL  TIMELIMIT  NODES  STATE NODELIST
 ucthi        up 208-08:00:      3   idle srvcnthpc[400-402]
 uctlo        up 208-08:00:      3   idle srvcnthpc[400-402]
 ucthimem*    up 208-08:00:     12   idle srvcnthpc[406-412,414-418]
 ucthimem*    up 208-08:00:      1   down srvcnthpc413
 uctlomem     up 208-08:00:     12   idle srvcnthpc[406-412,414-418]
 uctlomem     up 208-08:00:      1   down srvcnthpc413

PartitionName=ucthi
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[400-402]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:1
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=36 TotalNodes=3 SelectTypeParameters=NONE
    DefMemPerCPU=1000 MaxMemPerCPU=2000
 
 PartitionName=uctlo
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[400-402]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:1
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=36 TotalNodes=3 SelectTypeParameters=NONE
    DefMemPerCPU=1000 MaxMemPerCPU=2000
 
 PartitionName=ucthimem
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=YES QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[406-418]
    PriorityJobFactor=20 PriorityTier=20 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:4
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=104 TotalNodes=13 SelectTypeParameters=NONE
    DefMemPerCPU=2000 MaxMemPerCPU=4000
 
 PartitionName=uctlomem
    AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL
    AllocNodes=ALL Default=NO QoS=N/A
    DefaultTime=NONE DisableRootJobs=NO ExclusiveUser=NO GraceTime=0 Hidden=NO
    MaxNodes=UNLIMITED MaxTime=208-08:00:00 MinNodes=1 LLN=NO MaxCPUsPerNode=UNLIMITED
    Nodes=srvcnthpc[406-418]
    PriorityJobFactor=10 PriorityTier=10 RootOnly=NO ReqResv=NO OverSubscribe=FORCE:4
    OverTimeLimit=NONE PreemptMode=REQUEUE
    State=UP TotalCPUs=104 TotalNodes=13 SelectTypeParameters=NONE
    DefMemPerCPU=2000 MaxMemPerCPU=4000
 

WORKER NODE STATUS
NodeName=srvcnthpc400 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.24
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc400 NodeHostName=srvcnthpc400 Version=17.02
   OS=Linux RealMemory=64000 AllocMem=0 FreeMem=61766 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-07-23T13:35:04 SlurmdStartTime=2017-07-23T16:05:53
   CfgTRES=cpu=12,mem=62.50G
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc401 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.13
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc401 NodeHostName=srvcnthpc401 Version=17.02
   OS=Linux RealMemory=22700 AllocMem=0 FreeMem=21280 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-07-23T15:26:03 SlurmdStartTime=2017-07-23T15:26:32
   CfgTRES=cpu=12,mem=22700M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc402 Arch=x86_64 CoresPerSocket=6
   CPUAlloc=0 CPUErr=0 CPUTot=12 CPULoad=12.16
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G7:12
   NodeAddr=srvcnthpc402 NodeHostName=srvcnthpc402 Version=17.02
   OS=Linux RealMemory=22700 AllocMem=0 FreeMem=21514 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthi,uctlo 
   BootTime=2017-07-23T15:26:09 SlurmdStartTime=2017-07-23T15:26:25
   CfgTRES=cpu=12,mem=22700M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc406 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc406 NodeHostName=srvcnthpc406 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=46264 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:58 SlurmdStartTime=2017-07-23T16:06:54
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc407 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc407 NodeHostName=srvcnthpc407 Version=17.02
   OS=Linux RealMemory=48000 AllocMem=0 FreeMem=46257 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:31 SlurmdStartTime=2017-07-24T11:01:49
   CfgTRES=cpu=8,mem=48000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc408 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc408 NodeHostName=srvcnthpc408 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30247 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:22 SlurmdStartTime=2017-07-24T11:02:17
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc409 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc409 NodeHostName=srvcnthpc409 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30110 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T15:36:15 SlurmdStartTime=2017-07-23T15:37:04
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc410 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc410 NodeHostName=srvcnthpc410 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30287 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:05 SlurmdStartTime=2017-07-24T11:02:48
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc411 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc411 NodeHostName=srvcnthpc411 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30289 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:32:46 SlurmdStartTime=2017-07-24T11:03:11
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc412 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc412 NodeHostName=srvcnthpc412 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30296 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:00 SlurmdStartTime=2017-07-24T11:03:34
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc413 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc413 NodeHostName=srvcnthpc413 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30221 Sockets=2 Boards=1
   State=DOWN ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:32:30 SlurmdStartTime=2017-07-24T11:07:38
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
   Reason=Node unexpectedly rebooted [slurm@2017-07-31T15:11:02]

NodeName=srvcnthpc414 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc414 NodeHostName=srvcnthpc414 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30283 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:32:45 SlurmdStartTime=2017-07-24T11:07:50
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc415 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc415 NodeHostName=srvcnthpc415 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30250 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:47 SlurmdStartTime=2017-07-24T11:08:04
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc416 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G1:8
   NodeAddr=srvcnthpc416 NodeHostName=srvcnthpc416 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=30282 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:32:58 SlurmdStartTime=2017-07-24T11:08:19
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc417 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc417 NodeHostName=srvcnthpc417 Version=17.02
   OS=Linux RealMemory=48000 AllocMem=0 FreeMem=46262 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T13:33:54 SlurmdStartTime=2017-07-24T11:08:34
   CfgTRES=cpu=8,mem=48000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s

NodeName=srvcnthpc418 Arch=x86_64 CoresPerSocket=4
   CPUAlloc=0 CPUErr=0 CPUTot=8 CPULoad=0.01
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=chip:G6:8
   NodeAddr=srvcnthpc418 NodeHostName=srvcnthpc418 Version=17.02
   OS=Linux RealMemory=32000 AllocMem=0 FreeMem=22672 Sockets=2 Boards=1
   State=IDLE ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=ucthimem,uctlomem 
   BootTime=2017-07-23T15:37:17 SlurmdStartTime=2017-07-23T15:37:41
   CfgTRES=cpu=8,mem=32000M
   AllocTRES=
   CapWatts=n/a
   CurrentWatts=0 LowestJoules=0 ConsumedJoules=0
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s