Skip to content

Commit 25f00d7

Browse files
bobpengxiejerboaa
authored andcommittedJul 28, 2021
8269851: OperatingSystemMXBean getProcessCpuLoad reports incorrect process cpu usage in containers
Co-authored-by: Severin Gehwolf <sgehwolf@openjdk.org> Reviewed-by: sgehwolf
1 parent 41b4c19 commit 25f00d7

File tree

1 file changed

+140
-81
lines changed

1 file changed

+140
-81
lines changed
 

‎src/jdk.management/unix/classes/com/sun/management/internal/OperatingSystemImpl.java

+140-81
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@
2525

2626
package com.sun.management.internal;
2727

28+
import java.util.concurrent.TimeUnit;
29+
import java.util.function.DoubleSupplier;
30+
import java.util.function.LongSupplier;
31+
import java.util.function.ToDoubleFunction;
32+
2833
import jdk.internal.platform.Metrics;
2934
import sun.management.BaseOperatingSystemImpl;
3035
import sun.management.VMManagement;
31-
32-
import java.util.concurrent.TimeUnit;
3336
/**
3437
* Implementation class for the operating system.
3538
* Standard and committed hotspot-specific metrics if any.
@@ -42,8 +45,137 @@ class OperatingSystemImpl extends BaseOperatingSystemImpl
4245

4346
private static final int MAX_ATTEMPTS_NUMBER = 10;
4447
private final Metrics containerMetrics;
45-
private long usageTicks = 0; // used for cpu load calculation
46-
private long totalTicks = 0; // used for cpu load calculation
48+
private ContainerCpuTicks systemLoadTicks = new SystemCpuTicks();
49+
private ContainerCpuTicks processLoadTicks = new ProcessCpuTicks();
50+
51+
private abstract class ContainerCpuTicks {
52+
private long usageTicks = 0;
53+
private long totalTicks = 0;
54+
55+
private double getUsageDividesTotal(long usageTicks, long totalTicks) {
56+
// If cpu quota or cpu shares are in effect. Calculate the cpu load
57+
// based on the following formula (similar to how
58+
// getCpuLoad0() is being calculated):
59+
//
60+
// | usageTicks - usageTicks' |
61+
// ------------------------------
62+
// | totalTicks - totalTicks' |
63+
//
64+
// where usageTicks' and totalTicks' are historical values
65+
// retrieved via an earlier call of this method.
66+
if (usageTicks < 0 || totalTicks <= 0) {
67+
return -1;
68+
}
69+
long distance = usageTicks - this.usageTicks;
70+
this.usageTicks = usageTicks;
71+
long totalDistance = totalTicks - this.totalTicks;
72+
this.totalTicks = totalTicks;
73+
double systemLoad = 0.0;
74+
if (distance > 0 && totalDistance > 0) {
75+
systemLoad = ((double)distance) / totalDistance;
76+
}
77+
// Ensure the return value is in the range 0.0 -> 1.0
78+
systemLoad = Math.max(0.0, systemLoad);
79+
systemLoad = Math.min(1.0, systemLoad);
80+
return systemLoad;
81+
}
82+
83+
public double getContainerCpuLoad() {
84+
assert(containerMetrics != null);
85+
long quota = containerMetrics.getCpuQuota();
86+
long share = containerMetrics.getCpuShares();
87+
if (quota > 0) {
88+
long numPeriods = containerMetrics.getCpuNumPeriods();
89+
long quotaNanos = TimeUnit.MICROSECONDS.toNanos(quota * numPeriods);
90+
return getUsageDividesTotal(cpuUsageSupplier().getAsLong(), quotaNanos);
91+
} else if (share > 0) {
92+
long hostTicks = getHostTotalCpuTicks0();
93+
int totalCPUs = getHostOnlineCpuCount0();
94+
int containerCPUs = getAvailableProcessors();
95+
// scale the total host load to the actual container cpus
96+
hostTicks = hostTicks * containerCPUs / totalCPUs;
97+
return getUsageDividesTotal(cpuUsageSupplier().getAsLong(), hostTicks);
98+
} else {
99+
// If CPU quotas and shares are not active then find the average load for
100+
// all online CPUs that are allowed to run this container.
101+
102+
// If the cpuset is the same as the host's one there is no need to iterate over each CPU
103+
if (isCpuSetSameAsHostCpuSet()) {
104+
return defaultCpuLoadSupplier().getAsDouble();
105+
} else {
106+
int[] cpuSet = containerMetrics.getEffectiveCpuSetCpus();
107+
// in case the effectiveCPUSetCpus are not available, attempt to use just cpusets.cpus
108+
if (cpuSet == null || cpuSet.length <= 0) {
109+
cpuSet = containerMetrics.getCpuSetCpus();
110+
}
111+
if (cpuSet == null) {
112+
// cgroups is mounted, but CPU resource is not limited.
113+
// We can assume the VM is run on the host CPUs.
114+
return defaultCpuLoadSupplier().getAsDouble();
115+
} else if (cpuSet.length > 0) {
116+
return cpuSetCalc().applyAsDouble(cpuSet);
117+
}
118+
return -1;
119+
}
120+
}
121+
}
122+
123+
protected abstract DoubleSupplier defaultCpuLoadSupplier();
124+
protected abstract ToDoubleFunction<int[]> cpuSetCalc();
125+
protected abstract LongSupplier cpuUsageSupplier();
126+
}
127+
128+
private class ProcessCpuTicks extends ContainerCpuTicks {
129+
130+
@Override
131+
protected DoubleSupplier defaultCpuLoadSupplier() {
132+
return () -> getProcessCpuLoad0();
133+
}
134+
135+
@Override
136+
protected ToDoubleFunction<int[]> cpuSetCalc() {
137+
return (int[] cpuSet) -> {
138+
int totalCPUs = getHostOnlineCpuCount0();
139+
int containerCPUs = getAvailableProcessors();
140+
return Math.min(1.0, getProcessCpuLoad0() * totalCPUs / containerCPUs);
141+
};
142+
}
143+
144+
@Override
145+
protected LongSupplier cpuUsageSupplier() {
146+
return () -> getProcessCpuTime();
147+
}
148+
149+
}
150+
151+
private class SystemCpuTicks extends ContainerCpuTicks {
152+
153+
@Override
154+
protected DoubleSupplier defaultCpuLoadSupplier() {
155+
return () -> getCpuLoad0();
156+
}
157+
158+
@Override
159+
protected ToDoubleFunction<int[]> cpuSetCalc() {
160+
return (int[] cpuSet) -> {
161+
double systemLoad = 0.0;
162+
for (int cpu : cpuSet) {
163+
double cpuLoad = getSingleCpuLoad0(cpu);
164+
if (cpuLoad < 0) {
165+
return -1;
166+
}
167+
systemLoad += cpuLoad;
168+
}
169+
return systemLoad / cpuSet.length;
170+
};
171+
}
172+
173+
@Override
174+
protected LongSupplier cpuUsageSupplier() {
175+
return () -> containerMetrics.getCpuUsage();
176+
}
177+
178+
}
47179

48180
OperatingSystemImpl(VMManagement vm) {
49181
super(vm);
@@ -134,90 +266,17 @@ public long getMaxFileDescriptorCount() {
134266
return getMaxFileDescriptorCount0();
135267
}
136268

137-
private double getUsageDividesTotal(long usageTicks, long totalTicks) {
138-
// If cpu quota or cpu shares are in effect calculate the cpu load
139-
// based on the following formula (similar to how
140-
// getCpuLoad0() is being calculated):
141-
//
142-
// | usageTicks - usageTicks' |
143-
// ------------------------------
144-
// | totalTicks - totalTicks' |
145-
//
146-
// where usageTicks' and totalTicks' are historical values
147-
// retrieved via an earlier call of this method.
148-
//
149-
// Total ticks should be scaled to the container effective number
150-
// of cpus, if cpu shares are in effect.
151-
if (usageTicks < 0 || totalTicks <= 0) {
152-
return -1;
153-
}
154-
long distance = usageTicks - this.usageTicks;
155-
this.usageTicks = usageTicks;
156-
long totalDistance = totalTicks - this.totalTicks;
157-
this.totalTicks = totalTicks;
158-
159-
double systemLoad = 0.0;
160-
if (distance > 0 && totalDistance > 0) {
161-
systemLoad = ((double)distance) / totalDistance;
162-
}
163-
// Ensure the return value is in the range 0.0 -> 1.0
164-
systemLoad = Math.max(0.0, systemLoad);
165-
systemLoad = Math.min(1.0, systemLoad);
166-
return systemLoad;
167-
}
168-
169269
public double getCpuLoad() {
170270
if (containerMetrics != null) {
171-
long quota = containerMetrics.getCpuQuota();
172-
long share = containerMetrics.getCpuShares();
173-
long usageNanos = containerMetrics.getCpuUsage();
174-
if (quota > 0) {
175-
long numPeriods = containerMetrics.getCpuNumPeriods();
176-
long quotaNanos = TimeUnit.MICROSECONDS.toNanos(quota * numPeriods);
177-
return getUsageDividesTotal(usageNanos, quotaNanos);
178-
} else if (share > 0) {
179-
long hostTicks = getHostTotalCpuTicks0();
180-
int totalCPUs = getHostOnlineCpuCount0();
181-
int containerCPUs = getAvailableProcessors();
182-
// scale the total host load to the actual container cpus
183-
hostTicks = hostTicks * containerCPUs / totalCPUs;
184-
return getUsageDividesTotal(usageNanos, hostTicks);
185-
} else {
186-
// If CPU quotas and shares are not active then find the average system load for
187-
// all online CPUs that are allowed to run this container.
188-
189-
// If the cpuset is the same as the host's one there is no need to iterate over each CPU
190-
if (isCpuSetSameAsHostCpuSet()) {
191-
return getCpuLoad0();
192-
} else {
193-
int[] cpuSet = containerMetrics.getEffectiveCpuSetCpus();
194-
// in case the effectiveCPUSetCpus are not available, attempt to use just cpusets.cpus
195-
if (cpuSet == null || cpuSet.length <= 0) {
196-
cpuSet = containerMetrics.getCpuSetCpus();
197-
}
198-
if (cpuSet == null) {
199-
// cgroups is mounted, but CPU resource is not limited.
200-
// We can assume the VM is run on the host CPUs.
201-
return getCpuLoad0();
202-
} else if (cpuSet.length > 0) {
203-
double systemLoad = 0.0;
204-
for (int cpu : cpuSet) {
205-
double cpuLoad = getSingleCpuLoad0(cpu);
206-
if (cpuLoad < 0) {
207-
return -1;
208-
}
209-
systemLoad += cpuLoad;
210-
}
211-
return systemLoad / cpuSet.length;
212-
}
213-
return -1;
214-
}
215-
}
271+
return systemLoadTicks.getContainerCpuLoad();
216272
}
217273
return getCpuLoad0();
218274
}
219275

220276
public double getProcessCpuLoad() {
277+
if (containerMetrics != null) {
278+
return processLoadTicks.getContainerCpuLoad();
279+
}
221280
return getProcessCpuLoad0();
222281
}
223282

0 commit comments

Comments
 (0)
Please sign in to comment.