diff --git a/src/extract_gpuinfo_amdgpu.c b/src/extract_gpuinfo_amdgpu.c index c620ca7..ce2bef8 100644 --- a/src/extract_gpuinfo_amdgpu.c +++ b/src/extract_gpuinfo_amdgpu.c @@ -25,16 +25,17 @@ #include #include -#include #include #include +#include #include #include #include #include +#include #include -#include #include +#include #include #include #include @@ -183,7 +184,14 @@ struct gpu_info_amdgpu { int fd; amdgpu_device_handle amdgpu_device; - char pdev[20]; + char pdev[PDEV_LEN]; + int sysfsFD; // file descriptor for the /sys/bus/pci/devices// folder + int hwmonFD; // file descriptor for the /sys/bus/pci/devices//hwmon/hwmon[0-9]+ folder + + // We poll the fan frequently enough and want to avoid the open/close overhead of the sysfs file + FILE *fanSpeedFILE; // file descriptor for the current fan speed + // Used to compute the actual fan speed + unsigned maxFanValue; }; static LIST_HEAD(allocations); @@ -301,6 +309,11 @@ static void gpuinfo_amdgpu_shutdown(void) { struct gpu_info_amdgpu *allocated, *tmp; list_for_each_entry_safe(allocated, tmp, &allocations, allocate_list) { + if (allocated->hwmonFD >= 0) + close(allocated->hwmonFD); + if (allocated->sysfsFD >= 0) + close(allocated->sysfsFD); + fclose(allocated->fanSpeedFILE); list_del(&allocated->allocate_list); free(allocated); } @@ -352,6 +365,46 @@ static void authenticate_drm(int fd) { fprintf(stderr, "Failed to authenticate to DRM; XCB authentication unimplemented\n"); } +static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) { + // Open the device sys folder to gather information not available through the DRM driver + char devicePath[22 + PDEV_LEN]; + snprintf(devicePath, sizeof(devicePath), "/sys/bus/pci/devices/%s", gpu_info->pdev); + gpu_info->sysfsFD = open(devicePath, O_RDONLY); + + // Open the device hwmon folder (Fan speed are available there) + static const char hwmon[] = "hwmon"; + if (gpu_info->sysfsFD >= 0) { + int hwmondirFD = openat(gpu_info->sysfsFD, hwmon, O_RDONLY); + if (hwmondirFD >= 0) { + DIR *hwmonDir = fdopendir(hwmondirFD); + if (hwmonDir) { + struct dirent *dirEntry; + while ((dirEntry = readdir(hwmonDir))) { + // There should be one directory inside hwmon, with a name having the following pattern hwmon[0-9]+ + if (dirEntry->d_type == DT_DIR) { + size_t matchLen = 0; + for (matchLen = 0; matchLen < sizeof(hwmon) - 1; ++matchLen) { + if (dirEntry->d_name[matchLen] == '\0' || hwmon[matchLen] != dirEntry->d_name[matchLen]) + break; + } + // We found our candidate + if (matchLen == sizeof(hwmon) - 1) + break; + } + } + if (dirEntry) { + gpu_info->hwmonFD = openat(dirfd(hwmonDir), dirEntry->d_name, O_RDONLY); + } else + gpu_info->hwmonFD = -1; + closedir(hwmonDir); + } else + close(hwmondirFD); + } else + gpu_info->hwmonFD = -1; + } else + gpu_info->hwmonFD = -1; +} + #define VENDOR_AMD 0x1002 static bool gpuinfo_amdgpu_get_device_handles( @@ -450,6 +503,7 @@ static bool gpuinfo_amdgpu_get_device_handles( devs[i]->businfo.pci->bus, devs[i]->businfo.pci->dev, devs[i]->businfo.pci->func); + initDeviceSysfsPaths(&gpu_infos[*count]); list_add_tail(&gpu_infos[*count].base.list, devices); *count += 1; } else { @@ -464,6 +518,35 @@ static bool gpuinfo_amdgpu_get_device_handles( return true; } +static int rewindAndReadPattern(FILE *file, const char *format, ...) { + va_list args; + va_start(args, format); + rewind(file); + fflush(file); + int matches = vfscanf(file, format, args); + va_end(args); + return matches; +} + +static int readValueFromFileAt(int folderFD, const char *fileName, const char *format, ...) { + va_list args; + va_start(args, format); + // Open the file + int fd = openat(folderFD, fileName, O_RDONLY); + if (fd < 0) + return 0; + FILE *file = fdopen(fd, "r"); + if (!file) { + close(fd); + return 0; + } + // Read the pattern + int nread = vfscanf(file, format, args); + fclose(file); + va_end(args); + return nread; +} + static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) { struct gpu_info_amdgpu *gpu_info = container_of(_gpu_info, struct gpu_info_amdgpu, base); @@ -527,6 +610,45 @@ static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) { } } else RESET_VALID(gpuinfo_device_name_valid, static_info->valid); + + // Retrieve infos from sysfs. + + // 1) Fan + // If multiple fans are present, use the first one. Some hardware do not wire + // the sensor for the second fan, or use the same value as the first fan. + + // Look for which fan to use (PWM or RPM) + unsigned pwmIsEnabled; + int NreadPatterns = readValueFromFileAt(gpu_info->hwmonFD, "pwm1_enable", "%u", &pwmIsEnabled); + bool usePWMSensor = NreadPatterns == 1 && pwmIsEnabled > 0; + + bool useRPMSensor = false; + if (!usePWMSensor) { + unsigned rpmIsEnabled; + NreadPatterns = readValueFromFileAt(gpu_info->hwmonFD, "fan1_enable", "%u", &rpmIsEnabled); + useRPMSensor = NreadPatterns && rpmIsEnabled > 0; + } + // Either RPM or PWM or neither + assert((useRPMSensor ^ usePWMSensor) || (!useRPMSensor && !usePWMSensor)); + if (usePWMSensor || useRPMSensor) { + char *maxFanSpeedFile = usePWMSensor ? "pwm1_max" : "fan1_max"; + char *fanSensorFile = usePWMSensor ? "pwm1" : "fan1_input"; + unsigned maxSpeedVal; + NreadPatterns = readValueFromFileAt(gpu_info->hwmonFD, maxFanSpeedFile, "%u", &maxSpeedVal); + if (NreadPatterns) { + gpu_info->maxFanValue = maxSpeedVal; + // Open the fan file + int fanSpeedFD = openat(gpu_info->hwmonFD, fanSensorFile, O_RDONLY); + if (fanSpeedFD >= 0) { + gpu_info->fanSpeedFILE = fdopen(fanSpeedFD, "r"); + if (!gpu_info->fanSpeedFILE) + close(fanSpeedFD); + } else + gpu_info->fanSpeedFILE = NULL; + } else // we need the maximum value to get the speed percentage + gpu_info->fanSpeedFILE = NULL; + } + // TODO: temperature crit/emergency and PCIE max link/width } static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { @@ -633,8 +755,17 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { } else RESET_VALID(gpuinfo_gpu_temp_valid, dynamic_info->valid); - // TODO: Fan speed - // You can get the fan speed from sysfs hwmon pwm1 + // Fan speed + if (gpu_info->fanSpeedFILE) { + unsigned currentFanSpeed; + int patternsMatched = rewindAndReadPattern(gpu_info->fanSpeedFILE, "%u", ¤tFanSpeed); + if (patternsMatched == 1) { + dynamic_info->fan_speed = currentFanSpeed * 100 / gpu_info->maxFanValue; + SET_VALID(gpuinfo_fan_speed_valid, dynamic_info->valid); + } else + RESET_VALID(gpuinfo_fan_speed_valid, dynamic_info->valid); + } else + RESET_VALID(gpuinfo_fan_speed_valid, dynamic_info->valid); // Device power usage if (libdrm_amdgpu_handle && amdgpu_query_sensor_info)