diff --git a/pgme.go b/pgme.go index 7e90377..14c3643 100644 --- a/pgme.go +++ b/pgme.go @@ -1,29 +1,27 @@ package main import ( - "bytes" - "context" - "encoding/csv" - "fmt" - "html/template" - "net/http" - "log" - "os" - "os/exec" - "strings" - "syscall" + "bytes" + "context" + "encoding/csv" + "fmt" + "html/template" + "log" + "net/http" + "os" + "os/exec" "os/signal" "path" + "strings" + "syscall" ) - type PageVariables struct { - PageTitle string - Metrics []string - VersionInfo map[string]string + PageTitle string + Metrics []string + VersionInfo map[string]string } - var ( // BuildTime is a time label of the moment when the binary was built BuildTime = "unset" @@ -42,18 +40,16 @@ func getEnv(key, fallback string) string { return value } - // healthz is a liveness probe. func healthz(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) } - // name, index, temperature.gpu, utilization.gpu, // utilization.memory, memory.total, memory.free, memory.used func home(w http.ResponseWriter, r *http.Request) { - metricList := []string { + metricList := []string{ "temperature.gpu", "utilization.gpu", "utilization.memory", "memory.total", "memory.free", "memory.used"} @@ -62,7 +58,6 @@ func home(w http.ResponseWriter, r *http.Request) { verInfo["Commit"] = Commit verInfo["Release"] = Release - pv := PageVariables{ PageTitle: "Prometheus nVidia GPU Metrics Exporter", Metrics: metricList, @@ -83,55 +78,51 @@ func home(w http.ResponseWriter, r *http.Request) { } - func metrics(response http.ResponseWriter, request *http.Request) { - out, err := exec.Command( - "nvidia-smi", - "--query-gpu=name,index,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used", - "--format=csv,noheader,nounits").Output() - - if err != nil { - log.Printf("ERROR: %s\n", err) - return - } - - csvReader := csv.NewReader(bytes.NewReader(out)) - csvReader.TrimLeadingSpace = true - records, err := csvReader.ReadAll() - - if err != nil { - log.Printf("%s\n", err) - return - } - - metricList := []string { - "temperature.gpu", "utilization.gpu", - "utilization.memory", "memory.total", "memory.free", "memory.used"} - - result := "" - for _, row := range records { - name := fmt.Sprintf("%s[%s]", row[0], row[1]) - for idx, value := range row[2:] { - result = fmt.Sprintf("%s%s{gpu=\"%s\"} %s\n", result, metricList[idx], name, value) - } - } - - fmt.Fprintf(response, strings.Replace(result, ".", "_", -1)) -} + out, err := exec.Command( + "nvidia-smi", + "--query-gpu=name,index,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used", + "--format=csv,noheader,nounits").Output() + + if err != nil { + log.Printf("ERROR: %s\n", err) + return + } + csvReader := csv.NewReader(bytes.NewReader(out)) + csvReader.TrimLeadingSpace = true + records, err := csvReader.ReadAll() -func main() { - log.Print("Starting the service...") - port := getEnv("PORT", "9101"); - addr := ":"+port + if err != nil { + log.Printf("%s\n", err) + return + } - log.Print("- PORT set to "+ port +". If environment variable PORT is not set the default is 9101") + metricList := []string{ + "temperature.gpu", "utilization.gpu", + "utilization.memory", "memory.total", "memory.free", "memory.used"} + result := "" + for _, row := range records { + name := fmt.Sprintf("%s[%s]", row[0], row[1]) + for idx, value := range row[2:] { + result = fmt.Sprintf("%s%s{gpu=\"%s\"} %s\n", result, metricList[idx], name, value) + } + } + + fmt.Fprintf(response, strings.Replace(result, ".", "_", -1)) +} + +func main() { + log.Print("Starting the service...") + port := getEnv("PORT", "9101") + addr := ":" + port + + log.Print("- PORT set to " + port + ". If environment variable PORT is not set the default is 9101") interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt, syscall.SIGTERM) - srv := &http.Server{ Addr: addr, } @@ -139,7 +130,7 @@ func main() { go func() { http.HandleFunc("/", home) http.HandleFunc("/healthz", healthz) - http.HandleFunc("/metrics/", metrics) + http.HandleFunc("/metrics", metrics) err := srv.ListenAndServe() if err != nil { @@ -148,7 +139,7 @@ func main() { }() - log.Print("The service is listening on ", port) + log.Print("The service is listening on ", port) killSignal := <-interrupt switch killSignal { @@ -162,5 +153,4 @@ func main() { srv.Shutdown(context.Background()) log.Print("Done") - }