1 Commits

Author SHA1 Message Date
d75f4a8431 add min and max temp, refactore nvidia api 2024-04-21 17:33:21 +00:00
7 changed files with 192 additions and 179 deletions

View File

@@ -2,6 +2,7 @@ package api
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"html/template" "html/template"
"log" "log"
@@ -20,22 +21,14 @@ import (
) )
type ( type (
NVIDIARepository interface {
GetGPU(ID int) (nvidia.GPUDetail, error)
GetGPUs() ([]nvidia.GPU, error)
DriverVersion() string
CUDAVersion() string
}
Server struct { Server struct {
repo NVIDIARepository
mux *chi.Mux mux *chi.Mux
errLog *log.Logger errLog *log.Logger
} }
WebPack struct { WebPack struct {
GPUs []nvidia.GPU GPUs []nvidia.GPUSummary
GPU nvidia.GPUDetail GPU nvidia.GPU
Username string Username string
DriverVersion string DriverVersion string
CUDAVersion string CUDAVersion string
@@ -63,10 +56,9 @@ const internalServerErrorPage string = `<!DOCTYPE html>
</body> </body>
</html>` </html>`
func New(repo NVIDIARepository) *Server { func New() *Server {
s := &Server{ s := &Server{
mux: chi.NewRouter(), mux: chi.NewRouter(),
repo: repo,
errLog: log.New(os.Stderr, log.Prefix(), log.Flags()), errLog: log.New(os.Stderr, log.Prefix(), log.Flags()),
} }
s.mux.Use(middleware.RequestID) s.mux.Use(middleware.RequestID)
@@ -97,11 +89,7 @@ func (s *Server) Serve(port uint) error {
} }
func (s *Server) handleRoot(w http.ResponseWriter, _ *http.Request) { func (s *Server) handleRoot(w http.ResponseWriter, _ *http.Request) {
gpus, err := s.repo.GetGPUs() gpus := nvidia.Summary()
if err != nil {
internalServerErrorHTML(w, err)
return
}
if len(gpus) > 0 { if len(gpus) > 0 {
w.Header().Add("Location", "/"+gpus[0].UUID) w.Header().Add("Location", "/"+gpus[0].UUID)
w.WriteHeader(http.StatusTemporaryRedirect) w.WriteHeader(http.StatusTemporaryRedirect)
@@ -125,33 +113,32 @@ func (s *Server) handleRoot(w http.ResponseWriter, _ *http.Request) {
func (s *Server) handleGPU(w http.ResponseWriter, r *http.Request) { func (s *Server) handleGPU(w http.ResponseWriter, r *http.Request) {
uuid := chi.URLParam(r, "uuid") uuid := chi.URLParam(r, "uuid")
gpu, err := nvidia.GPUByUUID(uuid)
gpus, err := s.repo.GetGPUs() if err != nil {
if errors.Is(err, nvidia.ErrNotFound) {
w.Header().Add("Location", "/")
w.WriteHeader(http.StatusTemporaryRedirect)
return
}
internalServerErrorHTML(w, err)
return
}
driverVersion, err := nvidia.DriverVersion()
if err != nil { if err != nil {
internalServerErrorHTML(w, err) internalServerErrorHTML(w, err)
return return
} }
i := -1 cudaVersion, err := nvidia.CUDAVersion()
for _, gpu := range gpus {
if gpu.UUID == uuid {
i = gpu.Index
}
}
if i == -1 {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte("404 page not found"))
}
gpu, err := s.repo.GetGPU(i)
if err != nil { if err != nil {
internalServerErrorHTML(w, err) internalServerErrorHTML(w, err)
return return
} }
wp := WebPack{ wp := WebPack{
Username: "anonymous", Username: "anonymous",
GPUs: gpus, GPUs: nvidia.Summary(),
GPU: gpu, GPU: gpu,
DriverVersion: s.repo.DriverVersion(), DriverVersion: driverVersion,
CUDAVersion: s.repo.CUDAVersion(), CUDAVersion: cudaVersion,
Version: constant.Version, Version: constant.Version,
} }
@@ -168,24 +155,12 @@ func (s *Server) handleGPU(w http.ResponseWriter, r *http.Request) {
func (s *Server) handleGPUJSON(w http.ResponseWriter, r *http.Request) { func (s *Server) handleGPUJSON(w http.ResponseWriter, r *http.Request) {
uuid := chi.URLParam(r, "uuid") uuid := chi.URLParam(r, "uuid")
gpu, err := nvidia.GPUByUUID(uuid)
gpus, err := s.repo.GetGPUs()
if err != nil { if err != nil {
internalServerErrorJSON(w, err) if errors.Is(err, nvidia.ErrNotFound) {
return notFoundJSON(w)
} return
i := -1
for _, gpu := range gpus {
if gpu.UUID == uuid {
i = gpu.Index
} }
}
if i == -1 {
notFoundJSON(w)
return
}
gpu, err := s.repo.GetGPU(i)
if err != nil {
internalServerErrorJSON(w, err) internalServerErrorJSON(w, err)
return return
} }

10
go.mod
View File

@@ -3,12 +3,10 @@ module nvidiadashboard
go 1.22 go 1.22
require ( require (
github.com/NVIDIA/go-nvml v0.12.0-2 github.com/NVIDIA/go-nvml v0.12.0-4
github.com/go-chi/chi/v5 v5.0.12 github.com/go-chi/chi/v5 v5.0.12
golang.org/x/net v0.21.0 github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf
golang.org/x/net v0.24.0
) )
require ( require golang.org/x/text v0.14.0 // indirect
github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf // indirect
golang.org/x/text v0.14.0 // indirect
)

20
go.sum
View File

@@ -1,6 +1,5 @@
github.com/NVIDIA/go-nvml v0.12.0-2 h1:Sg239yy7jmopu/cuvYauoMj9fOpcGMngxVxxS1EBXeY= github.com/NVIDIA/go-nvml v0.12.0-4 h1:BvPjnjJr6qje0zov57Md7TwEA8i/12kZeUQIpyWzTEE=
github.com/NVIDIA/go-nvml v0.12.0-2/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0= github.com/NVIDIA/go-nvml v0.12.0-4/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-chi/chi/v5 v5.0.12 h1:9euLV5sTrTNTRUU9POmDUvfxyj6LAABLUcEWO+JJb4s= github.com/go-chi/chi/v5 v5.0.12 h1:9euLV5sTrTNTRUU9POmDUvfxyj6LAABLUcEWO+JJb4s=
@@ -9,18 +8,11 @@ github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf h1:FtEj8sfIcaaB
github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf/go.mod h1:yrqSXGoD/4EKfF26AOGzscPOgTTJcyAwM2rpixWT+t4= github.com/inhies/go-bytesize v0.0.0-20220417184213-4913239db9cf/go.mod h1:yrqSXGoD/4EKfF26AOGzscPOgTTJcyAwM2rpixWT+t4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

18
main.go
View File

@@ -1,6 +1,7 @@
package main package main
import ( import (
"flag"
"fmt" "fmt"
"log" "log"
"nvidiadashboard/api" "nvidiadashboard/api"
@@ -9,20 +10,19 @@ import (
) )
func main() { func main() {
var port int
flag.IntVar(&port, "port", 3000, "Port of the web server")
flag.Parse()
fmt.Println("*** NVIDIA Web Dashboard -", constant.Version, "***") fmt.Println("*** NVIDIA Web Dashboard -", constant.Version, "***")
r := nvidia.New() nvidia.RunDaemon()
defer r.Close() defer nvidia.Close()
log.Println("[INFO] NVIDIA driver loaded:", r.DriverVersion()) s := api.New()
gpus, _ := r.GetGPUs()
log.Printf("[INFO] %d NVIDIA GPUs found", len(gpus))
s := api.New(r)
log.Printf("[INFO] Server listening at :3000") log.Printf("[INFO] Server listening at :3000")
err := s.Serve(3000) err := s.Serve(uint(port))
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

View File

@@ -1,5 +1,5 @@
package constant package constant
const ( const (
Version = "0.1-alpha" Version = "0.2-alpha"
) )

View File

@@ -3,32 +3,42 @@ package nvidia
import ( import (
"errors" "errors"
"fmt" "fmt"
"log"
"os" "os"
"strconv" "strconv"
"strings" "strings"
"sync"
"time"
"github.com/NVIDIA/go-nvml/pkg/nvml" "github.com/NVIDIA/go-nvml/pkg/nvml"
) )
type ( type (
Repository struct { cache struct {
driverVersion string gpus map[string]GPU
cudaVersion string mu sync.RWMutex
}
GPUSummary struct {
UUID string
Name string
} }
GPU struct { GPU struct {
Name string `json:"name"` Name string `json:"name"`
UUID string `json:"uuid"` UUID string `json:"uuid"`
Index int `json:"-"` Index int `json:"-"`
Temperature Temperature `json:"temperature"`
Utilization Utilization `json:"usage"`
Processes []Process `json:"processes"`
Memory Memory `json:"memory"`
Fans []Fan `json:"fans"`
} }
GPUDetail struct { Temperature struct {
GPU Min uint
CoreTemperature int `json:"coreTemperature"` Max uint
Utilization Utilization `json:"usage"` Current uint
Processes []Process `json:"processes"`
Memory Memory `json:"memory"`
Fans []Fan `json:"fans"`
} }
Memory struct { Memory struct {
@@ -57,9 +67,15 @@ type (
} }
) )
var instance *Repository var (
ErrInitialization = errors.New("unable to initialize NVML")
ErrDriverAPI = errors.New("an error occured while querying the driver")
ErrNotFound = errors.New("the gpu is not found")
func (*Repository) Close() error { c *cache
)
func Close() error {
ret := nvml.Shutdown() ret := nvml.Shutdown()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return errors.New(nvml.ErrorString(ret)) return errors.New(nvml.ErrorString(ret))
@@ -67,60 +83,129 @@ func (*Repository) Close() error {
return nil return nil
} }
func New() *Repository { func RunDaemon() {
if instance == nil { c = &cache{
ret := nvml.Init() gpus: make(map[string]GPU),
if ret != nvml.SUCCESS {
panic("unable to initialize NVML: " + nvml.ErrorString(ret))
}
driverVersion, ret := nvml.SystemGetDriverVersion()
if ret != nvml.SUCCESS {
panic("unable to initialize NVML: " + nvml.ErrorString(ret))
}
cudaVersion, ret := nvml.SystemGetCudaDriverVersion_v2()
if ret != nvml.SUCCESS {
panic("unable to initialize NVML: " + nvml.ErrorString(ret))
}
instance = &Repository{
driverVersion: driverVersion,
cudaVersion: parseCUDA(cudaVersion),
}
} }
return instance ret := nvml.Init()
if ret != nvml.SUCCESS {
log.Println("[ERROR]", nvml.ErrorString(ret))
return
}
go func() {
for {
time.Sleep(1 * time.Second)
update()
}
}()
} }
func (*Repository) GetGPUs() ([]GPU, error) { func update() {
c.mu.Lock()
defer c.mu.Unlock()
count, ret := nvml.DeviceGetCount() count, ret := nvml.DeviceGetCount()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return nil, errors.New("unable to get device count: " + nvml.ErrorString(ret)) log.Println("[ERROR]", nvml.ErrorString(ret))
return
} }
gpus := make([]GPU, 0, count)
for i := 0; i < count; i++ { for i := 0; i < count; i++ {
gpu, _, err := getGPU(i) gpu, err := query(i)
if err != nil { if err != nil {
return nil, err log.Println("[ERROR]", err)
return
} }
gpus = append(gpus, gpu) t := &gpu.Temperature
if g, ok := c.gpus[gpu.UUID]; ok {
if g.Temperature.Min > t.Current {
t.Min = t.Current
} else {
t.Min = g.Temperature.Min
}
if g.Temperature.Max < t.Current {
t.Max = t.Current
} else {
t.Max = g.Temperature.Max
}
} else {
t.Max = t.Current
t.Min = t.Current
}
c.gpus[gpu.UUID] = gpu
} }
return gpus, nil
} }
func (*Repository) GetGPU(ID int) (GPUDetail, error) { func GPUByUUID(uuid string) (GPU, error) {
gpu, device, err := getGPU(ID) c.mu.RLock()
if err != nil { defer c.mu.RUnlock()
return GPUDetail{}, err gpu, ok := c.gpus[uuid]
if !ok {
return GPU{}, fmt.Errorf("%w: %s", ErrNotFound, uuid)
}
return gpu, nil
}
func Summary() []GPUSummary {
c.mu.RLock()
defer c.mu.RUnlock()
var res []GPUSummary
for _, gpu := range c.gpus {
res = append(res, GPUSummary{
UUID: gpu.UUID,
Name: gpu.Name,
})
}
return res
}
func DriverVersion() (string, error) {
driverVersion, ret := nvml.SystemGetDriverVersion()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("%w: %s", ErrInitialization, nvml.ErrorString(ret))
}
return driverVersion, nil
}
func CUDAVersion() (string, error) {
cudaVersion, ret := nvml.SystemGetCudaDriverVersion_v2()
if ret != nvml.SUCCESS {
return "", fmt.Errorf("%w: %s", ErrInitialization, nvml.ErrorString(ret))
}
return parseCUDA(cudaVersion), nil
}
func parseCUDA(version int) string {
major := (int)(version / 1000)
minor := (int)((version - (major * 1000)) / 10)
return fmt.Sprintf("%d.%d", major, minor)
}
func query(index int) (GPU, error) {
device, ret := nvml.DeviceGetHandleByIndex(index)
if ret != nvml.SUCCESS {
return GPU{}, fmt.Errorf("%w: DeviceGetHandleByIndex: %s", ErrDriverAPI, nvml.ErrorString(ret))
}
name, ret := device.GetName()
if ret != nvml.SUCCESS {
return GPU{}, fmt.Errorf("%w: GetName: %s", ErrDriverAPI, nvml.ErrorString(ret))
}
uuid, ret := device.GetUUID()
if ret != nvml.SUCCESS {
return GPU{}, fmt.Errorf("%w: GetUUID: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
fanCount, ret := device.GetNumFans() fanCount, ret := device.GetNumFans()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetNumFans: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
fans := make([]Fan, 0, fanCount) fans := make([]Fan, 0, fanCount)
for i := 0; i < fanCount; i++ { for i := 0; i < fanCount; i++ {
fdev, ret := device.GetFanSpeed_v2(i) fdev, ret := device.GetFanSpeed_v2(i)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetFanSpeed_v2: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
fan := Fan{ fan := Fan{
Speed: int(fdev), Speed: int(fdev),
@@ -130,30 +215,30 @@ func (*Repository) GetGPU(ID int) (GPUDetail, error) {
temp, ret := device.GetTemperature(nvml.TEMPERATURE_GPU) temp, ret := device.GetTemperature(nvml.TEMPERATURE_GPU)
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetTemperature: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
load, ret := device.GetUtilizationRates() load, ret := device.GetUtilizationRates()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetUtilizationRates: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
decUsage, _, ret := device.GetDecoderUtilization() decUsage, _, ret := device.GetDecoderUtilization()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetDecoderUtilization: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
encUsage, _, ret := device.GetEncoderUtilization() encUsage, _, ret := device.GetEncoderUtilization()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetEncoderUtilization: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
// Fetch all running process on the GPU // Fetch all running process on the GPU
var allProcess []Process allProcess := make([]Process, 0)
// Compute proc // Compute proc
proc, ret := device.GetComputeRunningProcesses() proc, ret := device.GetComputeRunningProcesses()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetComputeRunningProcesses: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
for _, p := range proc { for _, p := range proc {
@@ -169,7 +254,7 @@ func (*Repository) GetGPU(ID int) (GPUDetail, error) {
// Graphics/3D procs // Graphics/3D procs
proc, ret = device.GetGraphicsRunningProcesses() proc, ret = device.GetGraphicsRunningProcesses()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetGraphicsRunningProcesses: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
for _, p := range proc { for _, p := range proc {
sproc := Process{ sproc := Process{
@@ -184,7 +269,7 @@ func (*Repository) GetGPU(ID int) (GPUDetail, error) {
// MPS procs // MPS procs
proc, ret = device.GetMPSComputeRunningProcesses() proc, ret = device.GetMPSComputeRunningProcesses()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetMPSComputeRunningProcesses: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
for _, p := range proc { for _, p := range proc {
sproc := Process{ sproc := Process{
@@ -206,12 +291,15 @@ func (*Repository) GetGPU(ID int) (GPUDetail, error) {
mem, ret := device.GetMemoryInfo_v2() mem, ret := device.GetMemoryInfo_v2()
if ret != nvml.SUCCESS { if ret != nvml.SUCCESS {
return GPUDetail{}, errors.New(nvml.ErrorString(ret)) return GPU{}, fmt.Errorf("%w: GetMemoryInfo_v2: %s", ErrDriverAPI, nvml.ErrorString(ret))
} }
return GPUDetail{ return GPU{
GPU: gpu, Name: name,
CoreTemperature: int(temp), UUID: uuid,
Temperature: Temperature{
Current: uint(temp),
},
Utilization: Utilization{ Utilization: Utilization{
Decoder: int(decUsage), Decoder: int(decUsage),
Encoder: int(encUsage), Encoder: int(encUsage),
@@ -228,43 +316,3 @@ func (*Repository) GetGPU(ID int) (GPUDetail, error) {
Fans: fans, Fans: fans,
}, nil }, nil
} }
func (r *Repository) DriverVersion() string {
return r.driverVersion
}
func (r *Repository) CUDAVersion() string {
return r.cudaVersion
}
func parseCUDA(version int) string {
major := (int)(version / 1000)
minor := (int)((version - (major * 1000)) / 10)
return fmt.Sprintf("%d.%d", major, minor)
}
func getGPU(ID int) (GPU, nvml.Device, error) {
device, ret := nvml.DeviceGetHandleByIndex(ID)
if ret != nvml.SUCCESS {
return GPU{}, nvml.Device{}, errors.New(nvml.ErrorString(ret))
}
name, ret := device.GetName()
if ret != nvml.SUCCESS {
return GPU{}, nvml.Device{}, errors.New(nvml.ErrorString(ret))
}
uuid, ret := device.GetUUID()
if ret != nvml.SUCCESS {
return GPU{}, nvml.Device{}, errors.New(nvml.ErrorString(ret))
}
gpu := GPU{
Name: name,
UUID: uuid,
Index: ID,
}
return gpu, device, nil
}

2
static

Submodule static updated: 369059c925...e75644b851