Skip to content

Commit d462a6b

Browse files
committedNov 7, 2019
8230305: Cgroups v2: Container awareness
Implement Cgroups v2 container awareness in hotspot Reviewed-by: bobv, dholmes
1 parent 71340f5 commit d462a6b

10 files changed

+1425
-638
lines changed
 

‎src/hotspot/os/linux/cgroupSubsystem_linux.cpp

+421
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
/*
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CGROUP_SUBSYSTEM_LINUX_HPP
26+
#define CGROUP_SUBSYSTEM_LINUX_HPP
27+
28+
#include "memory/allocation.hpp"
29+
#include "runtime/os.hpp"
30+
#include "logging/log.hpp"
31+
#include "utilities/globalDefinitions.hpp"
32+
#include "utilities/macros.hpp"
33+
#include "osContainer_linux.hpp"
34+
35+
// Shared cgroups code (used by cgroup version 1 and version 2)
36+
37+
/*
38+
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota
39+
* is commonly used in cloud frameworks like Kubernetes[1],
40+
* AWS[2] and Mesos[3] in a similar way. They spawn containers with
41+
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
42+
* the inverse for determining the number of possible available
43+
* CPUs to the JVM inside a container. See JDK-8216366.
44+
*
45+
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
46+
* In particular:
47+
* When using Docker:
48+
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
49+
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
50+
* --cpu-shares flag in the docker run command.
51+
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
52+
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
53+
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
54+
*/
55+
#define PER_CPU_SHARES 1024
56+
57+
typedef char * cptr;
58+
59+
class CgroupController: public CHeapObj<mtInternal> {
60+
public:
61+
virtual char *subsystem_path();
62+
};
63+
64+
PRAGMA_DIAG_PUSH
65+
PRAGMA_FORMAT_NONLITERAL_IGNORED
66+
template <typename T> int subsystem_file_line_contents(CgroupController* c,
67+
const char *filename,
68+
const char *matchline,
69+
const char *scan_fmt,
70+
T returnval) {
71+
FILE *fp = NULL;
72+
char *p;
73+
char file[MAXPATHLEN+1];
74+
char buf[MAXPATHLEN+1];
75+
char discard[MAXPATHLEN+1];
76+
bool found_match = false;
77+
78+
if (c == NULL) {
79+
log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");
80+
return OSCONTAINER_ERROR;
81+
}
82+
if (c->subsystem_path() == NULL) {
83+
log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
84+
return OSCONTAINER_ERROR;
85+
}
86+
87+
strncpy(file, c->subsystem_path(), MAXPATHLEN);
88+
file[MAXPATHLEN-1] = '\0';
89+
int filelen = strlen(file);
90+
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
91+
log_debug(os, container)("File path too long %s, %s", file, filename);
92+
return OSCONTAINER_ERROR;
93+
}
94+
strncat(file, filename, MAXPATHLEN-filelen);
95+
log_trace(os, container)("Path to %s is %s", filename, file);
96+
fp = fopen(file, "r");
97+
if (fp != NULL) {
98+
int err = 0;
99+
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
100+
found_match = false;
101+
if (matchline == NULL) {
102+
// single-line file case
103+
int matched = sscanf(p, scan_fmt, returnval);
104+
found_match = (matched == 1);
105+
} else {
106+
// multi-line file case
107+
if (strstr(p, matchline) != NULL) {
108+
// discard matchline string prefix
109+
int matched = sscanf(p, scan_fmt, discard, returnval);
110+
found_match = (matched == 2);
111+
} else {
112+
continue; // substring not found
113+
}
114+
}
115+
if (found_match) {
116+
fclose(fp);
117+
return 0;
118+
} else {
119+
err = 1;
120+
log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
121+
}
122+
}
123+
if (err == 0) {
124+
log_debug(os, container)("Empty file %s", file);
125+
}
126+
} else {
127+
log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
128+
}
129+
if (fp != NULL)
130+
fclose(fp);
131+
return OSCONTAINER_ERROR;
132+
}
133+
PRAGMA_DIAG_POP
134+
135+
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \
136+
logstring, scan_fmt, variable) \
137+
return_type variable; \
138+
{ \
139+
int err; \
140+
err = subsystem_file_line_contents(subsystem, \
141+
filename, \
142+
NULL, \
143+
scan_fmt, \
144+
&variable); \
145+
if (err != 0) \
146+
return (return_type) OSCONTAINER_ERROR; \
147+
\
148+
log_trace(os, container)(logstring, variable); \
149+
}
150+
151+
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
152+
logstring, scan_fmt, variable, bufsize) \
153+
char variable[bufsize]; \
154+
{ \
155+
int err; \
156+
err = subsystem_file_line_contents(subsystem, \
157+
filename, \
158+
NULL, \
159+
scan_fmt, \
160+
variable); \
161+
if (err != 0) \
162+
return (return_type) NULL; \
163+
\
164+
log_trace(os, container)(logstring, variable); \
165+
}
166+
167+
#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \
168+
matchline, logstring, scan_fmt, variable) \
169+
return_type variable; \
170+
{ \
171+
int err; \
172+
err = subsystem_file_line_contents(controller, \
173+
filename, \
174+
matchline, \
175+
scan_fmt, \
176+
&variable); \
177+
if (err != 0) \
178+
return (return_type) OSCONTAINER_ERROR; \
179+
\
180+
log_trace(os, container)(logstring, variable); \
181+
}
182+
183+
// Four controllers: cpu, cpuset, cpuacct, memory
184+
#define CG_INFO_LENGTH 4
185+
186+
class CachedMetric : public CHeapObj<mtInternal>{
187+
private:
188+
volatile jlong _metric;
189+
volatile jlong _next_check_counter;
190+
public:
191+
CachedMetric() {
192+
_metric = -1;
193+
_next_check_counter = min_jlong;
194+
}
195+
bool should_check_metric() {
196+
return os::elapsed_counter() > _next_check_counter;
197+
}
198+
jlong value() { return _metric; }
199+
void set_value(jlong value, jlong timeout) {
200+
_metric = value;
201+
// Metric is unlikely to change, but we want to remain
202+
// responsive to configuration changes. A very short grace time
203+
// between re-read avoids excessive overhead during startup without
204+
// significantly reducing the VMs ability to promptly react to changed
205+
// metric config
206+
_next_check_counter = os::elapsed_counter() + timeout;
207+
}
208+
};
209+
210+
class CachingCgroupController : public CHeapObj<mtInternal> {
211+
private:
212+
CgroupController* _controller;
213+
CachedMetric* _metrics_cache;
214+
215+
public:
216+
CachingCgroupController(CgroupController* cont) {
217+
_controller = cont;
218+
_metrics_cache = new CachedMetric();
219+
}
220+
221+
CachedMetric* metrics_cache() { return _metrics_cache; }
222+
CgroupController* controller() { return _controller; }
223+
};
224+
225+
class CgroupSubsystem: public CHeapObj<mtInternal> {
226+
public:
227+
jlong memory_limit_in_bytes();
228+
int active_processor_count();
229+
230+
virtual int cpu_quota();
231+
virtual int cpu_period();
232+
virtual int cpu_shares();
233+
virtual jlong memory_usage_in_bytes();
234+
virtual jlong memory_and_swap_limit_in_bytes();
235+
virtual jlong memory_soft_limit_in_bytes();
236+
virtual jlong memory_max_usage_in_bytes();
237+
virtual char * cpu_cpuset_cpus();
238+
virtual char * cpu_cpuset_memory_nodes();
239+
virtual jlong read_memory_limit_in_bytes();
240+
virtual const char * container_type();
241+
virtual CachingCgroupController* memory_controller();
242+
virtual CachingCgroupController* cpu_controller();
243+
};
244+
245+
class CgroupSubsystemFactory: AllStatic {
246+
public:
247+
static CgroupSubsystem* create();
248+
};
249+
250+
// Class representing info in /proc/self/cgroup.
251+
// See man 7 cgroups
252+
class CgroupInfo : public StackObj {
253+
friend class CgroupSubsystemFactory;
254+
255+
private:
256+
char* _name;
257+
int _hierarchy_id;
258+
bool _enabled;
259+
char* _cgroup_path;
260+
261+
};
262+
263+
264+
#endif // CGROUP_SUBSYSTEM_LINUX_HPP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
/*
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include <string.h>
26+
#include <math.h>
27+
#include <errno.h>
28+
#include "cgroupV1Subsystem_linux.hpp"
29+
#include "logging/log.hpp"
30+
#include "memory/allocation.hpp"
31+
#include "runtime/globals.hpp"
32+
#include "runtime/os.hpp"
33+
#include "utilities/globalDefinitions.hpp"
34+
35+
/*
36+
* Set directory to subsystem specific files based
37+
* on the contents of the mountinfo and cgroup files.
38+
*/
39+
void CgroupV1Controller::set_subsystem_path(char *cgroup_path) {
40+
char buf[MAXPATHLEN+1];
41+
if (_root != NULL && cgroup_path != NULL) {
42+
if (strcmp(_root, "/") == 0) {
43+
int buflen;
44+
strncpy(buf, _mount_point, MAXPATHLEN);
45+
buf[MAXPATHLEN-1] = '\0';
46+
if (strcmp(cgroup_path,"/") != 0) {
47+
buflen = strlen(buf);
48+
if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
49+
return;
50+
}
51+
strncat(buf, cgroup_path, MAXPATHLEN-buflen);
52+
buf[MAXPATHLEN-1] = '\0';
53+
}
54+
_path = os::strdup(buf);
55+
} else {
56+
if (strcmp(_root, cgroup_path) == 0) {
57+
strncpy(buf, _mount_point, MAXPATHLEN);
58+
buf[MAXPATHLEN-1] = '\0';
59+
_path = os::strdup(buf);
60+
} else {
61+
char *p = strstr(cgroup_path, _root);
62+
if (p != NULL && p == _root) {
63+
if (strlen(cgroup_path) > strlen(_root)) {
64+
int buflen;
65+
strncpy(buf, _mount_point, MAXPATHLEN);
66+
buf[MAXPATHLEN-1] = '\0';
67+
buflen = strlen(buf);
68+
if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {
69+
return;
70+
}
71+
strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
72+
buf[MAXPATHLEN-1] = '\0';
73+
_path = os::strdup(buf);
74+
}
75+
}
76+
}
77+
}
78+
}
79+
}
80+
81+
/* uses_mem_hierarchy
82+
*
83+
* Return whether or not hierarchical cgroup accounting is being
84+
* done.
85+
*
86+
* return:
87+
* A number > 0 if true, or
88+
* OSCONTAINER_ERROR for not supported
89+
*/
90+
jlong CgroupV1MemoryController::uses_mem_hierarchy() {
91+
GET_CONTAINER_INFO(jlong, this, "/memory.use_hierarchy",
92+
"Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);
93+
return use_hierarchy;
94+
}
95+
96+
void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) {
97+
CgroupV1Controller::set_subsystem_path(cgroup_path);
98+
jlong hierarchy = uses_mem_hierarchy();
99+
if (hierarchy > 0) {
100+
set_hierarchical(true);
101+
}
102+
}
103+
104+
jlong CgroupV1Subsystem::read_memory_limit_in_bytes() {
105+
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.limit_in_bytes",
106+
"Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
107+
108+
if (memlimit >= _unlimited_memory) {
109+
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
110+
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
111+
if (mem_controller->is_hierarchical()) {
112+
const char* matchline = "hierarchical_memory_limit";
113+
const char* format = "%s " JULONG_FORMAT;
114+
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
115+
"Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
116+
if (hier_memlimit >= _unlimited_memory) {
117+
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
118+
} else {
119+
return (jlong)hier_memlimit;
120+
}
121+
}
122+
return (jlong)-1;
123+
}
124+
else {
125+
return (jlong)memlimit;
126+
}
127+
}
128+
129+
jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() {
130+
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.memsw.limit_in_bytes",
131+
"Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
132+
if (memswlimit >= _unlimited_memory) {
133+
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
134+
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
135+
if (mem_controller->is_hierarchical()) {
136+
const char* matchline = "hierarchical_memsw_limit";
137+
const char* format = "%s " JULONG_FORMAT;
138+
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
139+
"Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)
140+
if (hier_memlimit >= _unlimited_memory) {
141+
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
142+
} else {
143+
return (jlong)hier_memlimit;
144+
}
145+
}
146+
return (jlong)-1;
147+
} else {
148+
return (jlong)memswlimit;
149+
}
150+
}
151+
152+
jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() {
153+
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.soft_limit_in_bytes",
154+
"Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
155+
if (memsoftlimit >= _unlimited_memory) {
156+
log_trace(os, container)("Memory Soft Limit is: Unlimited");
157+
return (jlong)-1;
158+
} else {
159+
return (jlong)memsoftlimit;
160+
}
161+
}
162+
163+
/* memory_usage_in_bytes
164+
*
165+
* Return the amount of used memory for this process.
166+
*
167+
* return:
168+
* memory usage in bytes or
169+
* -1 for unlimited
170+
* OSCONTAINER_ERROR for not supported
171+
*/
172+
jlong CgroupV1Subsystem::memory_usage_in_bytes() {
173+
GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.usage_in_bytes",
174+
"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
175+
return memusage;
176+
}
177+
178+
/* memory_max_usage_in_bytes
179+
*
180+
* Return the maximum amount of used memory for this process.
181+
*
182+
* return:
183+
* max memory usage in bytes or
184+
* OSCONTAINER_ERROR for not supported
185+
*/
186+
jlong CgroupV1Subsystem::memory_max_usage_in_bytes() {
187+
GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.max_usage_in_bytes",
188+
"Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
189+
return memmaxusage;
190+
}
191+
192+
char * CgroupV1Subsystem::cpu_cpuset_cpus() {
193+
GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.cpus",
194+
"cpuset.cpus is: %s", "%1023s", cpus, 1024);
195+
return os::strdup(cpus);
196+
}
197+
198+
char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() {
199+
GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.mems",
200+
"cpuset.mems is: %s", "%1023s", mems, 1024);
201+
return os::strdup(mems);
202+
}
203+
204+
/* cpu_quota
205+
*
206+
* Return the number of milliseconds per period
207+
* process is guaranteed to run.
208+
*
209+
* return:
210+
* quota time in milliseconds
211+
* -1 for no quota
212+
* OSCONTAINER_ERROR for not supported
213+
*/
214+
int CgroupV1Subsystem::cpu_quota() {
215+
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_quota_us",
216+
"CPU Quota is: %d", "%d", quota);
217+
return quota;
218+
}
219+
220+
int CgroupV1Subsystem::cpu_period() {
221+
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_period_us",
222+
"CPU Period is: %d", "%d", period);
223+
return period;
224+
}
225+
226+
/* cpu_shares
227+
*
228+
* Return the amount of cpu shares available to the process
229+
*
230+
* return:
231+
* Share number (typically a number relative to 1024)
232+
* (2048 typically expresses 2 CPUs worth of processing)
233+
* -1 for no share setup
234+
* OSCONTAINER_ERROR for not supported
235+
*/
236+
int CgroupV1Subsystem::cpu_shares() {
237+
GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.shares",
238+
"CPU Shares is: %d", "%d", shares);
239+
// Convert 1024 to no shares setup
240+
if (shares == 1024) return -1;
241+
242+
return shares;
243+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CGROUP_V1_SUBSYSTEM_LINUX_HPP
26+
#define CGROUP_V1_SUBSYSTEM_LINUX_HPP
27+
28+
#include "runtime/os.hpp"
29+
#include "memory/allocation.hpp"
30+
#include "cgroupSubsystem_linux.hpp"
31+
32+
// Cgroups version 1 specific implementation
33+
34+
class CgroupV1Controller: public CgroupController {
35+
private:
36+
/* mountinfo contents */
37+
char *_root;
38+
char *_mount_point;
39+
40+
/* Constructed subsystem directory */
41+
char *_path;
42+
43+
public:
44+
CgroupV1Controller(char *root, char *mountpoint) {
45+
_root = os::strdup(root);
46+
_mount_point = os::strdup(mountpoint);
47+
_path = NULL;
48+
}
49+
50+
virtual void set_subsystem_path(char *cgroup_path);
51+
char *subsystem_path() { return _path; }
52+
};
53+
54+
class CgroupV1MemoryController: public CgroupV1Controller {
55+
56+
public:
57+
bool is_hierarchical() { return _uses_mem_hierarchy; }
58+
void set_subsystem_path(char *cgroup_path);
59+
private:
60+
/* Some container runtimes set limits via cgroup
61+
* hierarchy. If set to true consider also memory.stat
62+
* file if everything else seems unlimited */
63+
bool _uses_mem_hierarchy;
64+
jlong uses_mem_hierarchy();
65+
void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }
66+
67+
public:
68+
CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) {
69+
_uses_mem_hierarchy = false;
70+
}
71+
72+
};
73+
74+
class CgroupV1Subsystem: public CgroupSubsystem {
75+
76+
public:
77+
jlong read_memory_limit_in_bytes();
78+
jlong memory_and_swap_limit_in_bytes();
79+
jlong memory_soft_limit_in_bytes();
80+
jlong memory_usage_in_bytes();
81+
jlong memory_max_usage_in_bytes();
82+
char * cpu_cpuset_cpus();
83+
char * cpu_cpuset_memory_nodes();
84+
85+
int cpu_quota();
86+
int cpu_period();
87+
88+
int cpu_shares();
89+
90+
const char * container_type() {
91+
return "cgroupv1";
92+
}
93+
CachingCgroupController * memory_controller() { return _memory; }
94+
CachingCgroupController * cpu_controller() { return _cpu; }
95+
96+
private:
97+
julong _unlimited_memory;
98+
99+
/* controllers */
100+
CachingCgroupController* _memory = NULL;
101+
CgroupV1Controller* _cpuset = NULL;
102+
CachingCgroupController* _cpu = NULL;
103+
CgroupV1Controller* _cpuacct = NULL;
104+
105+
public:
106+
CgroupV1Subsystem(CgroupV1Controller* cpuset,
107+
CgroupV1Controller* cpu,
108+
CgroupV1Controller* cpuacct,
109+
CgroupV1MemoryController* memory) {
110+
_cpuset = cpuset;
111+
_cpu = new CachingCgroupController(cpu);
112+
_cpuacct = cpuacct;
113+
_memory = new CachingCgroupController(memory);
114+
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
115+
}
116+
};
117+
118+
#endif // CGROUP_V1_SUBSYSTEM_LINUX_HPP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
/*
2+
* Copyright (c) 2020, Red Hat Inc.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include "cgroupV2Subsystem_linux.hpp"
26+
27+
/* cpu_shares
28+
*
29+
* Return the amount of cpu shares available to the process
30+
*
31+
* return:
32+
* Share number (typically a number relative to 1024)
33+
* (2048 typically expresses 2 CPUs worth of processing)
34+
* -1 for no share setup
35+
* OSCONTAINER_ERROR for not supported
36+
*/
37+
int CgroupV2Subsystem::cpu_shares() {
38+
GET_CONTAINER_INFO(int, _unified, "/cpu.weight",
39+
"Raw value for CPU shares is: %d", "%d", shares);
40+
// Convert default value of 100 to no shares setup
41+
if (shares == 100) {
42+
log_debug(os, container)("CPU Shares is: %d", -1);
43+
return -1;
44+
}
45+
46+
// CPU shares (OCI) value needs to get translated into
47+
// a proper Cgroups v2 value. See:
48+
// https://github.com/containers/crun/blob/master/crun.1.md#cpu-controller
49+
//
50+
// Use the inverse of (x == OCI value, y == cgroupsv2 value):
51+
// ((262142 * y - 1)/9999) + 2 = x
52+
//
53+
int x = 262142 * shares - 1;
54+
double frac = x/9999.0;
55+
x = ((int)frac) + 2;
56+
log_trace(os, container)("Scaled CPU shares value is: %d", x);
57+
// Since the scaled value is not precise, return the closest
58+
// multiple of PER_CPU_SHARES for a more conservative mapping
59+
if ( x <= PER_CPU_SHARES ) {
60+
// will always map to 1 CPU
61+
log_debug(os, container)("CPU Shares is: %d", x);
62+
return x;
63+
}
64+
int f = x/PER_CPU_SHARES;
65+
int lower_multiple = f * PER_CPU_SHARES;
66+
int upper_multiple = (f + 1) * PER_CPU_SHARES;
67+
int distance_lower = MAX2(lower_multiple, x) - MIN2(lower_multiple, x);
68+
int distance_upper = MAX2(upper_multiple, x) - MIN2(upper_multiple, x);
69+
x = distance_lower <= distance_upper ? lower_multiple : upper_multiple;
70+
log_trace(os, container)("Closest multiple of %d of the CPU Shares value is: %d", PER_CPU_SHARES, x);
71+
log_debug(os, container)("CPU Shares is: %d", x);
72+
return x;
73+
}
74+
75+
/* cpu_quota
76+
*
77+
* Return the number of milliseconds per period
78+
* process is guaranteed to run.
79+
*
80+
* return:
81+
* quota time in milliseconds
82+
* -1 for no quota
83+
* OSCONTAINER_ERROR for not supported
84+
*/
85+
int CgroupV2Subsystem::cpu_quota() {
86+
char * cpu_quota_str = cpu_quota_val();
87+
int limit = (int)limit_from_str(cpu_quota_str);
88+
log_trace(os, container)("CPU Quota is: %d", limit);
89+
return limit;
90+
}
91+
92+
char * CgroupV2Subsystem::cpu_cpuset_cpus() {
93+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus",
94+
"cpuset.cpus is: %s", "%1023s", cpus, 1024);
95+
if (cpus == NULL) {
96+
return NULL;
97+
}
98+
return os::strdup(cpus);
99+
}
100+
101+
char* CgroupV2Subsystem::cpu_quota_val() {
102+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpu.max",
103+
"Raw value for CPU quota is: %s", "%s %*d", quota, 1024);
104+
if (quota == NULL) {
105+
return NULL;
106+
}
107+
return os::strdup(quota);
108+
}
109+
110+
char * CgroupV2Subsystem::cpu_cpuset_memory_nodes() {
111+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.mems",
112+
"cpuset.mems is: %s", "%1023s", mems, 1024);
113+
if (mems == NULL) {
114+
return NULL;
115+
}
116+
return os::strdup(mems);
117+
}
118+
119+
int CgroupV2Subsystem::cpu_period() {
120+
GET_CONTAINER_INFO(int, _unified, "/cpu.max",
121+
"CPU Period is: %d", "%*s %d", period);
122+
return period;
123+
}
124+
125+
/* memory_usage_in_bytes
126+
*
127+
* Return the amount of used memory used by this cgroup and decendents
128+
*
129+
* return:
130+
* memory usage in bytes or
131+
* -1 for unlimited
132+
* OSCONTAINER_ERROR for not supported
133+
*/
134+
jlong CgroupV2Subsystem::memory_usage_in_bytes() {
135+
GET_CONTAINER_INFO(jlong, _unified, "/memory.current",
136+
"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
137+
return memusage;
138+
}
139+
140+
jlong CgroupV2Subsystem::memory_soft_limit_in_bytes() {
141+
char* mem_soft_limit_str = mem_soft_limit_val();
142+
return limit_from_str(mem_soft_limit_str);
143+
}
144+
145+
jlong CgroupV2Subsystem::memory_max_usage_in_bytes() {
146+
// Log this string at trace level so as to make tests happy.
147+
log_trace(os, container)("Maximum Memory Usage is not supported.");
148+
return OSCONTAINER_ERROR; // not supported
149+
}
150+
151+
char* CgroupV2Subsystem::mem_soft_limit_val() {
152+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.high",
153+
"Memory Soft Limit is: %s", "%s", mem_soft_limit_str, 1024);
154+
if (mem_soft_limit_str == NULL) {
155+
return NULL;
156+
}
157+
return os::strdup(mem_soft_limit_str);
158+
}
159+
160+
jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() {
161+
char* mem_swp_limit_str = mem_swp_limit_val();
162+
return limit_from_str(mem_swp_limit_str);
163+
}
164+
165+
char* CgroupV2Subsystem::mem_swp_limit_val() {
166+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.max",
167+
"Memory and Swap Limit is: %s", "%s", mem_swp_limit_str, 1024);
168+
if (mem_swp_limit_str == NULL) {
169+
return NULL;
170+
}
171+
return os::strdup(mem_swp_limit_str);
172+
}
173+
174+
/* memory_limit_in_bytes
175+
*
176+
* Return the limit of available memory for this process.
177+
*
178+
* return:
179+
* memory limit in bytes or
180+
* -1 for unlimited, OSCONTAINER_ERROR for an error
181+
*/
182+
jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
183+
char * mem_limit_str = mem_limit_val();
184+
jlong limit = limit_from_str(mem_limit_str);
185+
if (log_is_enabled(Trace, os, container)) {
186+
if (limit == -1) {
187+
log_trace(os, container)("Memory Limit is: Unlimited");
188+
} else {
189+
log_trace(os, container)("Memory Limit is: " JLONG_FORMAT, limit);
190+
}
191+
}
192+
return limit;
193+
}
194+
195+
jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
196+
if (limit_str == NULL) {
197+
return OSCONTAINER_ERROR;
198+
}
199+
// Unlimited memory in Cgroups V2 is the literal string 'max'
200+
if (strcmp("max", limit_str) == 0) {
201+
os::free(limit_str);
202+
return (jlong)-1;
203+
}
204+
julong limit;
205+
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
206+
os::free(limit_str);
207+
return OSCONTAINER_ERROR;
208+
}
209+
os::free(limit_str);
210+
return (jlong)limit;
211+
}
212+
213+
char* CgroupV2Subsystem::mem_limit_val() {
214+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
215+
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
216+
if (mem_limit_str == NULL) {
217+
return NULL;
218+
}
219+
return os::strdup(mem_limit_str);
220+
}
221+
222+
char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
223+
char buf[MAXPATHLEN+1];
224+
int buflen;
225+
strncpy(buf, mount_path, MAXPATHLEN);
226+
buf[MAXPATHLEN] = '\0';
227+
buflen = strlen(buf);
228+
if ((buflen + strlen(cgroup_path)) > MAXPATHLEN) {
229+
return NULL;
230+
}
231+
strncat(buf, cgroup_path, MAXPATHLEN-buflen);
232+
buf[MAXPATHLEN] = '\0';
233+
return os::strdup(buf);
234+
}
235+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright (c) 2020, Red Hat Inc.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CGROUP_V2_SUBSYSTEM_LINUX_HPP
26+
#define CGROUP_V2_SUBSYSTEM_LINUX_HPP
27+
28+
#include "cgroupSubsystem_linux.hpp"
29+
30+
class CgroupV2Controller: public CgroupController {
31+
private:
32+
/* the mount path of the cgroup v2 hierarchy */
33+
char *_mount_path;
34+
/* The cgroup path for the controller */
35+
char *_cgroup_path;
36+
37+
/* Constructed full path to the subsystem directory */
38+
char *_path;
39+
static char* construct_path(char* mount_path, char *cgroup_path);
40+
41+
public:
42+
CgroupV2Controller(char * mount_path, char *cgroup_path) {
43+
_mount_path = mount_path;
44+
_cgroup_path = os::strdup(cgroup_path);
45+
_path = construct_path(mount_path, cgroup_path);
46+
}
47+
48+
char *subsystem_path() { return _path; }
49+
};
50+
51+
class CgroupV2Subsystem: public CgroupSubsystem {
52+
private:
53+
/* One unified controller */
54+
CgroupController* _unified = NULL;
55+
/* Caching wrappers for cpu/memory metrics */
56+
CachingCgroupController* _memory = NULL;
57+
CachingCgroupController* _cpu = NULL;
58+
59+
char *mem_limit_val();
60+
char *mem_swp_limit_val();
61+
char *mem_soft_limit_val();
62+
char *cpu_quota_val();
63+
jlong limit_from_str(char* limit_str);
64+
65+
public:
66+
CgroupV2Subsystem(CgroupController * unified) {
67+
_unified = unified;
68+
_memory = new CachingCgroupController(unified);
69+
_cpu = new CachingCgroupController(unified);
70+
}
71+
72+
jlong read_memory_limit_in_bytes();
73+
int cpu_quota();
74+
int cpu_period();
75+
int cpu_shares();
76+
jlong memory_and_swap_limit_in_bytes();
77+
jlong memory_soft_limit_in_bytes();
78+
jlong memory_usage_in_bytes();
79+
jlong memory_max_usage_in_bytes();
80+
char * cpu_cpuset_cpus();
81+
char * cpu_cpuset_memory_nodes();
82+
const char * container_type() {
83+
return "cgroupv2";
84+
}
85+
CachingCgroupController * memory_controller() { return _memory; }
86+
CachingCgroupController * cpu_controller() { return _cpu; }
87+
};
88+
89+
#endif // CGROUP_V2_SUBSYSTEM_LINUX_HPP

‎src/hotspot/os/linux/osContainer_linux.cpp

+35-631
Large diffs are not rendered by default.

‎src/hotspot/os/linux/osContainer_linux.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,6 @@ class OSContainer: AllStatic {
4848
static inline bool is_containerized();
4949
static const char * container_type();
5050

51-
static jlong uses_mem_hierarchy();
5251
static jlong memory_limit_in_bytes();
5352
static jlong memory_and_swap_limit_in_bytes();
5453
static jlong memory_soft_limit_in_bytes();

‎src/hotspot/os/linux/os_linux.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
3131
static bool zero_page_read_protected() { return true; }
3232

3333
class Linux {
34+
friend class CgroupSubsystem;
3435
friend class os;
3536
friend class OSContainer;
3637
friend class TestReserveMemorySpecial;

‎test/hotspot/jtreg/containers/docker/TestCPUAwareness.java

+17-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,7 @@
3434
* @run driver TestCPUAwareness
3535
*/
3636
import java.util.List;
37+
import jdk.test.lib.process.OutputAnalyzer;
3738
import jdk.test.lib.containers.docker.Common;
3839
import jdk.test.lib.containers.docker.DockerRunOptions;
3940
import jdk.test.lib.containers.docker.DockerTestUtils;
@@ -213,9 +214,21 @@ private static void testCpuShares(int shares, int expectedAPC) throws Exception
213214

214215
DockerRunOptions opts = Common.newOpts(imageName)
215216
.addDockerOpts("--cpu-shares=" + shares);
216-
Common.run(opts)
217-
.shouldMatch("CPU Shares is.*" + shares)
218-
.shouldMatch("active_processor_count.*" + expectedAPC);
217+
OutputAnalyzer out = Common.run(opts);
218+
// Cgroups v2 needs to do some scaling of raw shares values. Hence,
219+
// 256 CPU shares come back as 264. Raw value written to cpu.weight
220+
// is 10. The reason this works for >= 1024 shares value is because
221+
// post-scaling the closest multiple of 1024 is found and returned.
222+
//
223+
// For values < 1024, this doesn't happen so loosen the match to a
224+
// 3-digit number and ensure the active_processor_count is as
225+
// expected.
226+
if (shares < 1024) {
227+
out.shouldMatch("CPU Shares is.*\\d{3}");
228+
} else {
229+
out.shouldMatch("CPU Shares is.*" + shares);
230+
}
231+
out.shouldMatch("active_processor_count.*" + expectedAPC);
219232
}
220233

221234
private static void testOperatingSystemMXBeanAwareness(String cpuAllocation, String expectedCpus) throws Exception {

0 commit comments

Comments
 (0)
Please sign in to comment.