Skip to content

Commit 234053e

Browse files
bernd-awsWilliam Kemper
authored and
William Kemper
committedMay 3, 2021
Use PLABs for old gen allocations, including promotions.
Reviewed-by: rkennke
1 parent fdd30f5 commit 234053e

17 files changed

+296
-95
lines changed
 

‎src/hotspot/share/gc/shared/plab.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ class PLAB: public CHeapObj<mtGC> {
140140
// Fills in the unallocated portion of the buffer with a garbage object and updates
141141
// statistics. To be called during GC.
142142
void retire();
143+
144+
HeapWord* top() {
145+
return _top;
146+
}
143147
};
144148

145149
// PLAB book-keeping.

‎src/hotspot/share/gc/shenandoah/shenandoahAllocRequest.hpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ class ShenandoahAllocRequest : StackObj {
3232
public:
3333
enum Type {
3434
_alloc_shared, // Allocate common, outside of TLAB
35-
_alloc_shared_gc, // Allocate common, outside of GCLAB
35+
_alloc_shared_gc, // Allocate common, outside of GCLAB/PLAB
3636
_alloc_tlab, // Allocate TLAB
3737
_alloc_gclab, // Allocate GCLAB
38+
_alloc_plab, // Allocate PLAB
3839
_ALLOC_LIMIT
3940
};
4041

@@ -48,6 +49,8 @@ class ShenandoahAllocRequest : StackObj {
4849
return "TLAB";
4950
case _alloc_gclab:
5051
return "GCLAB";
52+
case _alloc_plab:
53+
return "PLAB";
5154
default:
5255
ShouldNotReachHere();
5356
return "";
@@ -81,6 +84,10 @@ class ShenandoahAllocRequest : StackObj {
8184
return ShenandoahAllocRequest(min_size, requested_size, _alloc_gclab, ShenandoahRegionAffiliation::YOUNG_GENERATION);
8285
}
8386

87+
static inline ShenandoahAllocRequest for_plab(size_t min_size, size_t requested_size) {
88+
return ShenandoahAllocRequest(min_size, requested_size, _alloc_plab, ShenandoahRegionAffiliation::OLD_GENERATION);
89+
}
90+
8491
static inline ShenandoahAllocRequest for_shared_gc(size_t requested_size, ShenandoahRegionAffiliation affiliation) {
8592
return ShenandoahAllocRequest(0, requested_size, _alloc_shared_gc, affiliation);
8693
}
@@ -125,6 +132,7 @@ class ShenandoahAllocRequest : StackObj {
125132
case _alloc_shared:
126133
return true;
127134
case _alloc_gclab:
135+
case _alloc_plab:
128136
case _alloc_shared_gc:
129137
return false;
130138
default:
@@ -139,6 +147,7 @@ class ShenandoahAllocRequest : StackObj {
139147
case _alloc_shared:
140148
return false;
141149
case _alloc_gclab:
150+
case _alloc_plab:
142151
case _alloc_shared_gc:
143152
return true;
144153
default:
@@ -151,6 +160,7 @@ class ShenandoahAllocRequest : StackObj {
151160
switch (_alloc_type) {
152161
case _alloc_tlab:
153162
case _alloc_gclab:
163+
case _alloc_plab:
154164
return true;
155165
case _alloc_shared:
156166
case _alloc_shared_gc:

‎src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,11 @@ void ShenandoahBarrierSet::on_thread_detach(Thread *thread) {
120120
gclab->retire();
121121
}
122122

123-
// SATB protocol requires to keep alive reacheable oops from roots at the beginning of GC
124123
ShenandoahHeap* const heap = ShenandoahHeap::heap();
124+
PLAB* plab = ShenandoahThreadLocalData::plab(thread);
125+
heap->retire_plab(plab);
126+
127+
// SATB protocol requires to keep alive reacheable oops from roots at the beginning of GC
125128
if (heap->is_concurrent_mark_in_progress()) {
126129
ShenandoahKeepAliveClosure oops;
127130
StackWatermarkSet::finish_processing(thread->as_Java_thread(), &oops, StackWatermarkKind::gc);

‎src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp

+35-47
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
#include "memory/resourceArea.hpp"
3636
#include "runtime/orderAccess.hpp"
3737

38-
3938
ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) :
4039
_heap(heap),
4140
_mutator_free_bitmap(max_regions, mtGC),
@@ -65,6 +64,23 @@ bool ShenandoahFreeSet::is_collector_free(size_t idx) const {
6564
return _collector_free_bitmap.at(idx);
6665
}
6766

67+
HeapWord* ShenandoahFreeSet::allocate_with_affiliation(ShenandoahRegionAffiliation affiliation, ShenandoahAllocRequest& req, bool& in_new_region) {
68+
for (size_t c = _collector_rightmost + 1; c > _collector_leftmost; c--) {
69+
// size_t is unsigned, need to dodge underflow when _leftmost = 0
70+
size_t idx = c - 1;
71+
if (is_collector_free(idx)) {
72+
ShenandoahHeapRegion* r = _heap->get_region(idx);
73+
if (r->affiliation() == affiliation) {
74+
HeapWord* result = try_allocate_in(r, req, in_new_region);
75+
if (result != NULL) {
76+
return result;
77+
}
78+
}
79+
}
80+
}
81+
return NULL;
82+
}
83+
6884
HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool& in_new_region) {
6985
// Scan the bitmap looking for a first fit.
7086
//
@@ -96,64 +112,33 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool&
96112
break;
97113
}
98114
case ShenandoahAllocRequest::_alloc_gclab:
115+
case ShenandoahAllocRequest::_alloc_plab:
99116
case ShenandoahAllocRequest::_alloc_shared_gc: {
100-
// size_t is unsigned, need to dodge underflow when _leftmost = 0
101-
102-
// Fast-path: try to allocate in the collector view first
103-
for (size_t c = _collector_rightmost + 1; c > _collector_leftmost; c--) {
104-
size_t idx = c - 1;
105-
if (is_collector_free(idx)) {
106-
ShenandoahHeapRegion* r = _heap->get_region(idx);
107-
if (r->is_young() && req.is_old()) {
108-
// We don't want to cannibalize a young region to satisfy
109-
// an evacuation from an old region.
110-
continue;
111-
}
112-
HeapWord* result = try_allocate_in(r, req, in_new_region);
113-
if (result != NULL) {
114-
if (r->is_old()) {
115-
// HEY! This is a very coarse card marking. We hope to repair
116-
// such cards during remembered set scanning.
117-
118-
// HEY! To support full generality with alternative remembered set implementations,
119-
// is preferable to not make direct access to the current card_table implementation.
120-
// Try ShenandoahHeap::heap()->card_scan()->mark_range_as_dirty(result, req.actual_size());
121-
122-
ShenandoahBarrierSet::barrier_set()->card_table()->dirty_MemRegion(MemRegion(result, req.actual_size()));
123-
}
124-
return result;
125-
}
126-
}
117+
// First try to fit into a region that is already in use in the same generation.
118+
HeapWord* result = allocate_with_affiliation(req.affiliation(), req, in_new_region);
119+
if (result != NULL) {
120+
return result;
121+
}
122+
// Then try a free region that is dedicated to GC allocations.
123+
result = allocate_with_affiliation(FREE, req, in_new_region);
124+
if (result != NULL) {
125+
return result;
127126
}
128127

129128
// No dice. Can we borrow space from mutator view?
130129
if (!ShenandoahEvacReserveOverflow) {
131130
return NULL;
132131
}
133132

134-
// Try to steal the empty region from the mutator view
133+
// Try to steal an empty region from the mutator view.
135134
for (size_t c = _mutator_rightmost + 1; c > _mutator_leftmost; c--) {
136135
size_t idx = c - 1;
137136
if (is_mutator_free(idx)) {
138137
ShenandoahHeapRegion* r = _heap->get_region(idx);
139138
if (can_allocate_from(r)) {
140-
if (r->is_young() && req.is_old()) {
141-
continue;
142-
}
143-
144139
flip_to_gc(r);
145140
HeapWord *result = try_allocate_in(r, req, in_new_region);
146141
if (result != NULL) {
147-
if (r->is_old()) {
148-
// HEY! This is a very coarse card marking. We hope to repair
149-
// such cards during remembered set scanning.
150-
151-
// HEY! To support full generality with alternative remembered set implementations,
152-
// is preferable to not make direct access to the current card_table implementation.
153-
// Try ShenandoahHeap::heap()->card_scan()->mark_range_as_dirty(result, req.actual_size());
154-
155-
ShenandoahBarrierSet::barrier_set()->card_table()->dirty_MemRegion(MemRegion(result, req.actual_size()));
156-
}
157142
return result;
158143
}
159144
}
@@ -163,13 +148,11 @@ HeapWord* ShenandoahFreeSet::allocate_single(ShenandoahAllocRequest& req, bool&
163148
// No dice. Do not try to mix mutator and GC allocations, because
164149
// URWM moves due to GC allocations would expose unparsable mutator
165150
// allocations.
166-
167151
break;
168152
}
169153
default:
170154
ShouldNotReachHere();
171155
}
172-
173156
return NULL;
174157
}
175158

@@ -202,11 +185,11 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah
202185
size = free;
203186
}
204187
if (size >= req.min_size()) {
205-
result = r->allocate(size, req.type());
188+
result = r->allocate(size, req);
206189
assert (result != NULL, "Allocation must succeed: free " SIZE_FORMAT ", actual " SIZE_FORMAT, free, size);
207190
}
208191
} else {
209-
result = r->allocate(size, req.type());
192+
result = r->allocate(size, req);
210193
}
211194

212195
if (result != NULL) {
@@ -439,6 +422,10 @@ void ShenandoahFreeSet::flip_to_gc(ShenandoahHeapRegion* r) {
439422
adjust_bounds();
440423
}
441424
assert_bounds();
425+
426+
// We do not ensure that the region is no longer trash,
427+
// relying on try_allocate_in(), which always comes next,
428+
// to recycle trash before attempting to allocate anything in the region.
442429
}
443430

444431
void ShenandoahFreeSet::clear() {
@@ -599,6 +586,7 @@ HeapWord* ShenandoahFreeSet::allocate(ShenandoahAllocRequest& req, bool& in_new_
599586
case ShenandoahAllocRequest::_alloc_shared_gc:
600587
in_new_region = true;
601588
return allocate_contiguous(req);
589+
case ShenandoahAllocRequest::_alloc_plab:
602590
case ShenandoahAllocRequest::_alloc_gclab:
603591
case ShenandoahAllocRequest::_alloc_tlab:
604592
in_new_region = false;

‎src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class ShenandoahFreeSet : public CHeapObj<mtGC> {
4949
bool is_collector_free(size_t idx) const;
5050

5151
HeapWord* try_allocate_in(ShenandoahHeapRegion* region, ShenandoahAllocRequest& req, bool& in_new_region);
52+
HeapWord* allocate_with_affiliation(ShenandoahRegionAffiliation affiliation, ShenandoahAllocRequest& req, bool& in_new_region);
5253
HeapWord* allocate_single(ShenandoahAllocRequest& req, bool& in_new_region);
5354
HeapWord* allocate_contiguous(ShenandoahAllocRequest& req);
5455

‎src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ class ShenandoahPrepareForCompactionObjectClosure : public ObjectClosure {
331331
void finish_region() {
332332
assert(_to_region != NULL, "should not happen");
333333
if (_heap->mode()->is_generational() && _to_region->affiliation() == FREE) {
334-
// HEY! Changing this region to young during compaction may not be
334+
// TODO: Changing this region to young during compaction may not be
335335
// technically correct here because it completely disregards the ages
336336
// and origins of the objects being moved. It is, however, certainly
337337
// more correct than putting live objects into a region without a

‎src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp

+100-13
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,67 @@ HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size)
833833
return gclab->allocate(size);
834834
}
835835

836+
HeapWord* ShenandoahHeap::allocate_from_plab_slow(Thread* thread, size_t size) {
837+
// New object should fit the PLAB size
838+
size_t min_size = MAX2(size, PLAB::min_size());
839+
840+
// Figure out size of new PLAB, looking back at heuristics. Expand aggressively.
841+
size_t new_size = ShenandoahThreadLocalData::plab_size(thread) * 2;
842+
new_size = MIN2(new_size, PLAB::max_size());
843+
new_size = MAX2(new_size, PLAB::min_size());
844+
845+
// Record new heuristic value even if we take any shortcut. This captures
846+
// the case when moderately-sized objects always take a shortcut. At some point,
847+
// heuristics should catch up with them.
848+
ShenandoahThreadLocalData::set_plab_size(thread, new_size);
849+
850+
if (new_size < size) {
851+
// New size still does not fit the object. Fall back to shared allocation.
852+
// This avoids retiring perfectly good PLABs, when we encounter a large object.
853+
return NULL;
854+
}
855+
856+
// Retire current PLAB, and allocate a new one.
857+
PLAB* plab = ShenandoahThreadLocalData::plab(thread);
858+
retire_plab(plab);
859+
860+
size_t actual_size = 0;
861+
HeapWord* plab_buf = allocate_new_plab(min_size, new_size, &actual_size);
862+
if (plab_buf == NULL) {
863+
return NULL;
864+
}
865+
866+
assert (size <= actual_size, "allocation should fit");
867+
868+
if (ZeroTLAB) {
869+
// ..and clear it.
870+
Copy::zero_to_words(plab_buf, actual_size);
871+
} else {
872+
// ...and zap just allocated object.
873+
#ifdef ASSERT
874+
// Skip mangling the space corresponding to the object header to
875+
// ensure that the returned space is not considered parsable by
876+
// any concurrent GC thread.
877+
size_t hdr_size = oopDesc::header_size();
878+
Copy::fill_to_words(plab_buf + hdr_size, actual_size - hdr_size, badHeapWordVal);
879+
#endif // ASSERT
880+
}
881+
plab->set_buf(plab_buf, actual_size);
882+
return plab->allocate(size);
883+
}
884+
885+
void ShenandoahHeap::retire_plab(PLAB* plab) {
886+
size_t waste = plab->waste();
887+
HeapWord* top = plab->top();
888+
plab->retire();
889+
if (top != NULL && plab->waste() > waste) {
890+
// If retiring the plab created a filler object, then we
891+
// need to register it with our card scanner so it can
892+
// safely walk the region backing the plab.
893+
card_scan()->register_object(top);
894+
}
895+
}
896+
836897
HeapWord* ShenandoahHeap::allocate_new_tlab(size_t min_size,
837898
size_t requested_size,
838899
size_t* actual_size) {
@@ -859,6 +920,19 @@ HeapWord* ShenandoahHeap::allocate_new_gclab(size_t min_size,
859920
return res;
860921
}
861922

923+
HeapWord* ShenandoahHeap::allocate_new_plab(size_t min_size,
924+
size_t word_size,
925+
size_t* actual_size) {
926+
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_plab(min_size, word_size);
927+
HeapWord* res = allocate_memory(req);
928+
if (res != NULL) {
929+
*actual_size = req.actual_size();
930+
} else {
931+
*actual_size = 0;
932+
}
933+
return res;
934+
}
935+
862936
HeapWord* ShenandoahHeap::allocate_memory(ShenandoahAllocRequest& req) {
863937
intptr_t pacer_epoch = 0;
864938
bool in_new_region = false;
@@ -954,23 +1028,25 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
9541028
//
9551029
// The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as last-start
9561030
// representing object b while first-start represents object c. This is why we need to require all register_object()
957-
// invocations to be "mutually exclusive". Later, when we use GCLABs to allocate memory for promotions and evacuations,
1031+
// invocations to be "mutually exclusive". Later, when we use GCLABs and PLABs to allocate memory for promotions and evacuations,
9581032
// the protocol may work something like the following:
959-
// 1. The GCLAB is allocated by this (or similar) function, while holding the global lock.
960-
// 2. The GCLAB is registered as a single object.
961-
/// 3. The GCLAB is always aligned at the start of a card memory range and is always a multiple of the card-table memory range size
962-
// 3. Individual allocations carved from the GCLAB are not immediately registered
963-
// 4. When the GCLAB is eventually retired, all of the objects allocated within the GCLAB are registered in batch by a
964-
// single thread. No further synchronization is required because no other allocations will pertain to the same
1033+
// 1. The GCLAB/PLAB is allocated by this (or similar) function, while holding the global lock.
1034+
// 2. The GCLAB/PLAB is always aligned at the start of a card memory range
1035+
// and is always a multiple of the card-table memory range size.
1036+
// 3. Individual allocations carved from a GCLAB/PLAB are not immediately registered.
1037+
// 4. A PLAB is registered as a single object.
1038+
// 5. When a PLAB is eventually retired, all of the objects allocated within the GCLAB/PLAB are registered in batch by a
1039+
// single thread. No further synchronization is required because no other allocations will pertain to the same
9651040
// card-table memory ranges.
9661041
//
967-
// The other case that needs special handling is promotion of regions en masse. When the region is promoted, all objects contained
968-
// within the region are registered. Since the region is a multiple of card-table memory range sizes, there is no need for
969-
// synchronization. It might be nice to figure out how to allow multiple threads to work together to register all of the objects in
970-
// a promoted region, or at least try to balance the efforts so that different gc threads work on registering the objects of
971-
// different heap regions. But that effort will come later.
1042+
// The other case that needs special handling is region promotion. When a region is promoted, all objects contained
1043+
// in it are registered. Since the region is a multiple of card table memory range sizes, there is no need for
1044+
// synchronization.
1045+
// TODO: figure out how to allow multiple threads to work together to register all of the objects in
1046+
// a promoted region, or at least try to balance the efforts so that different GC threads work
1047+
// on registering the objects of different heap regions.
9721048
//
973-
if (result != NULL && req.affiliation() == ShenandoahRegionAffiliation::OLD_GENERATION) {
1049+
if (mode()->is_generational() && result != NULL && req.affiliation() == ShenandoahRegionAffiliation::OLD_GENERATION) {
9741050
ShenandoahHeap::heap()->card_scan()->register_object(result);
9751051
}
9761052
return result;
@@ -1140,6 +1216,10 @@ class ShenandoahCheckCleanGCLABClosure : public ThreadClosure {
11401216
PLAB* gclab = ShenandoahThreadLocalData::gclab(thread);
11411217
assert(gclab != NULL, "GCLAB should be initialized for %s", thread->name());
11421218
assert(gclab->words_remaining() == 0, "GCLAB should not need retirement");
1219+
1220+
PLAB* plab = ShenandoahThreadLocalData::plab(thread);
1221+
assert(plab != NULL, "PLAB should be initialized for %s", thread->name());
1222+
assert(plab->words_remaining() == 0, "PLAB should not need retirement");
11431223
}
11441224
};
11451225

@@ -1155,6 +1235,13 @@ class ShenandoahRetireGCLABClosure : public ThreadClosure {
11551235
if (_resize && ShenandoahThreadLocalData::gclab_size(thread) > 0) {
11561236
ShenandoahThreadLocalData::set_gclab_size(thread, 0);
11571237
}
1238+
1239+
PLAB* plab = ShenandoahThreadLocalData::plab(thread);
1240+
assert(plab != NULL, "PLAB should be initialized for %s", thread->name());
1241+
ShenandoahHeap::heap()->retire_plab(plab);
1242+
if (_resize && ShenandoahThreadLocalData::plab_size(thread) > 0) {
1243+
ShenandoahThreadLocalData::set_plab_size(thread, 0);
1244+
}
11581245
}
11591246
};
11601247

‎src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444

4545
class ConcurrentGCTimer;
4646
class ObjectIterateScanRootClosure;
47+
class PLAB;
4748
class ShenandoahCollectorPolicy;
4849
class ShenandoahControlThread;
4950
class ShenandoahRegulatorThread;
@@ -567,10 +568,15 @@ class ShenandoahHeap : public CollectedHeap {
567568
//
568569
private:
569570
HeapWord* allocate_memory_under_lock(ShenandoahAllocRequest& request, bool& in_new_region);
571+
570572
inline HeapWord* allocate_from_gclab(Thread* thread, size_t size);
571573
HeapWord* allocate_from_gclab_slow(Thread* thread, size_t size);
572574
HeapWord* allocate_new_gclab(size_t min_size, size_t word_size, size_t* actual_size);
573575

576+
inline HeapWord* allocate_from_plab(Thread* thread, size_t size);
577+
HeapWord* allocate_from_plab_slow(Thread* thread, size_t size);
578+
HeapWord* allocate_new_plab(size_t min_size, size_t word_size, size_t* actual_size);
579+
574580
public:
575581
HeapWord* allocate_memory(ShenandoahAllocRequest& request);
576582
HeapWord* mem_allocate(size_t size, bool* what);
@@ -676,6 +682,7 @@ class ShenandoahHeap : public CollectedHeap {
676682
inline RememberedScanner* card_scan() { return _card_scan; }
677683
void clear_cards_for(ShenandoahHeapRegion* region);
678684
void mark_card_as_dirty(HeapWord* location);
685+
void retire_plab(PLAB* plab);
679686

680687
// ---------- Helper functions
681688
//

‎src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp

+70-20
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
4343
#include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
4444
#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
45+
#include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
4546
#include "gc/shenandoah/mode/shenandoahMode.hpp"
4647
#include "oops/compressedOops.inline.hpp"
4748
#include "oops/oop.inline.hpp"
@@ -211,10 +212,31 @@ inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size
211212
if (obj != NULL) {
212213
return obj;
213214
}
214-
// Otherwise...
215215
return allocate_from_gclab_slow(thread, size);
216216
}
217217

218+
inline HeapWord* ShenandoahHeap::allocate_from_plab(Thread* thread, size_t size) {
219+
assert(UseTLAB, "TLABs should be enabled");
220+
221+
PLAB* plab = ShenandoahThreadLocalData::plab(thread);
222+
if (plab == NULL) {
223+
assert(!thread->is_Java_thread() && !thread->is_Worker_thread(),
224+
"Performance: thread should have PLAB: %s", thread->name());
225+
// No PLABs in this thread, fallback to shared allocation
226+
return NULL;
227+
}
228+
HeapWord* obj = plab->allocate(size);
229+
if (obj == NULL) {
230+
obj = allocate_from_plab_slow(thread, size);
231+
}
232+
233+
if (mode()->is_generational() && obj != NULL) {
234+
ShenandoahHeap::heap()->card_scan()->register_object(obj);
235+
}
236+
237+
return obj;
238+
}
239+
218240
inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
219241
if (ShenandoahThreadLocalData::is_oom_during_evac(Thread::current())) {
220242
// This thread went through the OOM during evac protocol and it is safe to return
@@ -241,13 +263,6 @@ inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
241263
} else if (mark.age() >= InitialTenuringThreshold) {
242264
oop result = try_evacuate_object(p, thread, r, OLD_GENERATION);
243265
if (result != NULL) {
244-
// TODO: Just marking the cards covering this object dirty
245-
// may overall be less efficient than scanning it now for references to young gen
246-
// or other alternatives like deferred card marking or scanning.
247-
// We should revisit this.
248-
// Furthermore, the object start should be registered for remset scanning.
249-
MemRegion mr(cast_from_oop<HeapWord*>(result), result->size());
250-
ShenandoahBarrierSet::barrier_set()->card_table()->invalidate(mr);
251266
return result;
252267
}
253268
}
@@ -256,7 +271,7 @@ inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
256271
}
257272

258273
inline oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region, ShenandoahRegionAffiliation target_gen) {
259-
bool alloc_from_gclab = true;
274+
bool alloc_from_lab = true;
260275
HeapWord* copy = NULL;
261276
size_t size = p->size();
262277

@@ -266,13 +281,28 @@ inline oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, Shenandoah
266281
copy = NULL;
267282
} else {
268283
#endif
269-
if (UseTLAB && target_gen == YOUNG_GENERATION) {
270-
copy = allocate_from_gclab(thread, size);
284+
if (UseTLAB) {
285+
switch (target_gen) {
286+
case YOUNG_GENERATION: {
287+
copy = allocate_from_gclab(thread, size);
288+
break;
289+
}
290+
case OLD_GENERATION: {
291+
if (ShenandoahUsePLAB) {
292+
copy = allocate_from_plab(thread, size);
293+
}
294+
break;
295+
}
296+
default: {
297+
ShouldNotReachHere();
298+
break;
299+
}
300+
}
271301
}
272302
if (copy == NULL) {
273303
ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, target_gen);
274304
copy = allocate_memory(req);
275-
alloc_from_gclab = false;
305+
alloc_from_lab = false;
276306
}
277307
#ifdef ASSERT
278308
}
@@ -311,6 +341,10 @@ inline oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, Shenandoah
311341
// Try to install the new forwarding pointer.
312342
oop result = ShenandoahForwarding::try_update_forwardee(p, copy_val);
313343
if (result == copy_val) {
344+
if (target_gen == OLD_GENERATION) {
345+
ShenandoahBarrierSet::barrier_set()->card_table()->dirty_MemRegion(MemRegion(copy, size));
346+
card_scan()->register_object(copy);
347+
}
314348
// Successfully evacuated. Our copy is now the public one!
315349
shenandoah_assert_correct(NULL, copy_val);
316350
return copy_val;
@@ -320,17 +354,33 @@ inline oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, Shenandoah
320354
// But if it happens to contain references to evacuated regions, those references would
321355
// not get updated for this stale copy during this cycle, and we will crash while scanning
322356
// it the next cycle.
323-
//
324-
// For GCLAB allocations, it is enough to rollback the allocation ptr. Either the next
325-
// object will overwrite this stale copy, or the filler object on LAB retirement will
326-
// do this. For non-GCLAB allocations, we have no way to retract the allocation, and
327-
// have to explicitly overwrite the copy with the filler object. With that overwrite,
328-
// we have to keep the fwdptr initialized and pointing to our (stale) copy.
329-
if (alloc_from_gclab) {
330-
ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
357+
if (alloc_from_lab) {
358+
// For LAB allocations, it is enough to rollback the allocation ptr. Either the next
359+
// object will overwrite this stale copy, or the filler object on LAB retirement will
360+
// do this.
361+
switch (target_gen) {
362+
case YOUNG_GENERATION: {
363+
ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
364+
break;
365+
}
366+
case OLD_GENERATION: {
367+
ShenandoahThreadLocalData::plab(thread)->undo_allocation(copy, size);
368+
break;
369+
}
370+
default: {
371+
ShouldNotReachHere();
372+
break;
373+
}
374+
}
331375
} else {
376+
// For non-LAB allocations, we have no way to retract the allocation, and
377+
// have to explicitly overwrite the copy with the filler object. With that overwrite,
378+
// we have to keep the fwdptr initialized and pointing to our (stale) copy.
332379
fill_with_object(copy, size);
333380
shenandoah_assert_correct(NULL, copy_val);
381+
if (target_gen == OLD_GENERATION) {
382+
card_scan()->register_object(copy);
383+
}
334384
}
335385
shenandoah_assert_correct(NULL, result);
336386
return result;

‎src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp

+20-4
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,11 @@ ShenandoahHeapRegion::ShenandoahHeapRegion(HeapWord* start, size_t index, bool c
7070
_top(start),
7171
_tlab_allocs(0),
7272
_gclab_allocs(0),
73+
_plab_allocs(0),
7374
_live_data(0),
7475
_critical_pins(0),
7576
_update_watermark(start),
76-
_affiliation(ShenandoahRegionAffiliation::FREE),
77+
_affiliation(FREE),
7778
_age(0) {
7879

7980
assert(Universe::on_page_boundary(_bottom) && Universe::on_page_boundary(_end),
@@ -91,13 +92,14 @@ void ShenandoahHeapRegion::report_illegal_transition(const char *method) {
9192
fatal("%s", ss.as_string());
9293
}
9394

94-
void ShenandoahHeapRegion::make_regular_allocation() {
95+
void ShenandoahHeapRegion::make_regular_allocation(ShenandoahRegionAffiliation affiliation) {
9596
shenandoah_assert_heaplocked();
9697
reset_age();
9798
switch (_state) {
9899
case _empty_uncommitted:
99100
do_commit();
100101
case _empty_committed:
102+
set_affiliation(affiliation);
101103
set_state(_regular);
102104
case _regular:
103105
case _pinned:
@@ -119,6 +121,12 @@ void ShenandoahHeapRegion::make_regular_bypass() {
119121
case _cset:
120122
case _humongous_start:
121123
case _humongous_cont:
124+
// TODO: Changing this region to young during compaction may not be
125+
// technically correct here because it completely disregards the ages
126+
// and origins of the objects being moved. It is, however, certainly
127+
// more correct than putting live objects into a region without a
128+
// generational affiliation.
129+
set_affiliation(YOUNG_GENERATION);
122130
set_state(_regular);
123131
return;
124132
case _pinned_cset:
@@ -220,6 +228,7 @@ void ShenandoahHeapRegion::make_unpinned() {
220228

221229
switch (_state) {
222230
case _pinned:
231+
assert(affiliation() != FREE, "Pinned region should not be FREE");
223232
set_state(_regular);
224233
return;
225234
case _regular:
@@ -318,10 +327,11 @@ void ShenandoahHeapRegion::make_committed_bypass() {
318327
void ShenandoahHeapRegion::reset_alloc_metadata() {
319328
_tlab_allocs = 0;
320329
_gclab_allocs = 0;
330+
_plab_allocs = 0;
321331
}
322332

323333
size_t ShenandoahHeapRegion::get_shared_allocs() const {
324-
return used() - (_tlab_allocs + _gclab_allocs) * HeapWordSize;
334+
return used() - (_tlab_allocs + _gclab_allocs + _plab_allocs) * HeapWordSize;
325335
}
326336

327337
size_t ShenandoahHeapRegion::get_tlab_allocs() const {
@@ -332,6 +342,10 @@ size_t ShenandoahHeapRegion::get_gclab_allocs() const {
332342
return _gclab_allocs * HeapWordSize;
333343
}
334344

345+
size_t ShenandoahHeapRegion::get_plab_allocs() const {
346+
return _plab_allocs * HeapWordSize;
347+
}
348+
335349
void ShenandoahHeapRegion::set_live_data(size_t s) {
336350
assert(Thread::current()->is_VM_thread(), "by VM thread");
337351
_live_data = (s >> LogHeapWordSize);
@@ -397,6 +411,9 @@ void ShenandoahHeapRegion::print_on(outputStream* st) const {
397411
st->print("|U " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(used()), proper_unit_for_byte_size(used()));
398412
st->print("|T " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(get_tlab_allocs()), proper_unit_for_byte_size(get_tlab_allocs()));
399413
st->print("|G " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(get_gclab_allocs()), proper_unit_for_byte_size(get_gclab_allocs()));
414+
if (ShenandoahHeap::heap()->mode()->is_generational()) {
415+
st->print("|G " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(get_plab_allocs()), proper_unit_for_byte_size(get_plab_allocs()));
416+
}
400417
st->print("|S " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(get_shared_allocs()), proper_unit_for_byte_size(get_shared_allocs()));
401418
st->print("|L " SIZE_FORMAT_W(5) "%1s", byte_size_in_proper_unit(get_live_data_bytes()), proper_unit_for_byte_size(get_live_data_bytes()));
402419
st->print("|CP " SIZE_FORMAT_W(3), pin_count());
@@ -533,7 +550,6 @@ void ShenandoahHeapRegion::recycle() {
533550
set_update_watermark(bottom());
534551

535552
make_empty();
536-
537553
set_affiliation(FREE);
538554

539555
if (ZapUnusedHeapArea) {

‎src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class ShenandoahHeapRegion {
168168
}
169169

170170
// Allowed transitions from the outside code:
171-
void make_regular_allocation();
171+
void make_regular_allocation(ShenandoahRegionAffiliation affiliation);
172172
void make_regular_bypass();
173173
void make_humongous_start();
174174
void make_humongous_cont();
@@ -242,6 +242,7 @@ class ShenandoahHeapRegion {
242242

243243
size_t _tlab_allocs;
244244
size_t _gclab_allocs;
245+
size_t _plab_allocs;
245246

246247
volatile size_t _live_data;
247248
volatile size_t _critical_pins;
@@ -339,7 +340,7 @@ class ShenandoahHeapRegion {
339340
}
340341

341342
// Allocation (return NULL if full)
342-
inline HeapWord* allocate(size_t word_size, ShenandoahAllocRequest::Type type);
343+
inline HeapWord* allocate(size_t word_size, ShenandoahAllocRequest req);
343344

344345
inline void clear_live_data();
345346
void set_live_data(size_t s);
@@ -391,6 +392,7 @@ class ShenandoahHeapRegion {
391392
size_t get_shared_allocs() const;
392393
size_t get_tlab_allocs() const;
393394
size_t get_gclab_allocs() const;
395+
size_t get_plab_allocs() const;
394396

395397
inline HeapWord* get_update_watermark() const;
396398
inline void set_update_watermark(HeapWord* w);

‎src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.inline.hpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030
#include "gc/shenandoah/shenandoahPacer.inline.hpp"
3131
#include "runtime/atomic.hpp"
3232

33-
HeapWord* ShenandoahHeapRegion::allocate(size_t size, ShenandoahAllocRequest::Type type) {
33+
HeapWord* ShenandoahHeapRegion::allocate(size_t size, ShenandoahAllocRequest req) {
3434
shenandoah_assert_heaplocked_or_safepoint();
3535
assert(is_object_aligned(size), "alloc size breaks alignment: " SIZE_FORMAT, size);
3636

3737
HeapWord* obj = top();
3838
if (pointer_delta(end(), obj) >= size) {
39-
make_regular_allocation();
40-
adjust_alloc_metadata(type, size);
39+
make_regular_allocation(req.affiliation());
40+
adjust_alloc_metadata(req.type(), size);
4141

4242
HeapWord* new_top = obj + size;
4343
set_top(new_top);
@@ -63,6 +63,9 @@ inline void ShenandoahHeapRegion::adjust_alloc_metadata(ShenandoahAllocRequest::
6363
case ShenandoahAllocRequest::_alloc_gclab:
6464
_gclab_allocs += size;
6565
break;
66+
case ShenandoahAllocRequest::_alloc_plab:
67+
_plab_allocs += size;
68+
break;
6669
default:
6770
ShouldNotReachHere();
6871
}

‎src/hotspot/share/gc/shenandoah/shenandoahHeapRegionCounters.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ void ShenandoahHeapRegionCounters::update() {
111111
data |= ((100 * r->get_live_data_bytes() / rs) & PERCENT_MASK) << LIVE_SHIFT;
112112
data |= ((100 * r->get_tlab_allocs() / rs) & PERCENT_MASK) << TLAB_SHIFT;
113113
data |= ((100 * r->get_gclab_allocs() / rs) & PERCENT_MASK) << GCLAB_SHIFT;
114+
data |= ((100 * r->get_plab_allocs() / rs) & PERCENT_MASK) << PLAB_SHIFT;
114115
data |= ((100 * r->get_shared_allocs() / rs) & PERCENT_MASK) << SHARED_SHIFT;
115116

116117
data |= (r->age() & AGE_MASK) << AGE_SHIFT;

‎src/hotspot/share/gc/shenandoah/shenandoahHeapRegionCounters.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
* - bits 14-20 tlab allocated memory in percent
5252
* - bits 21-27 gclab allocated memory in percent
5353
* - bits 28-34 shared allocated memory in percent
54-
* - bits 35-41 <reserved>
54+
* - bits 35-41 plab allocated memory in percent
5555
* - bits 42-50 <reserved>
5656
* - bits 51-55 age
5757
* - bits 56-57 affiliation: 0 = free, young = 1, old = 2
@@ -70,6 +70,7 @@ class ShenandoahHeapRegionCounters : public CHeapObj<mtGC> {
7070
static const jlong TLAB_SHIFT = 14;
7171
static const jlong GCLAB_SHIFT = 21;
7272
static const jlong SHARED_SHIFT = 28;
73+
static const jlong PLAB_SHIFT = 35;
7374
static const jlong AGE_SHIFT = 51;
7475
static const jlong AFFILIATION_SHIFT = 56;
7576
static const jlong STATUS_SHIFT = 58;

‎src/hotspot/share/gc/shenandoah/shenandoahThreadLocalData.hpp

+21
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class ShenandoahThreadLocalData {
4747
SATBMarkQueue _satb_mark_queue;
4848
PLAB* _gclab;
4949
size_t _gclab_size;
50+
PLAB* _plab;
51+
size_t _plab_size;
5052
uint _worker_id;
5153
int _disarmed_value;
5254
double _paced_time;
@@ -58,6 +60,8 @@ class ShenandoahThreadLocalData {
5860
_satb_mark_queue(&ShenandoahBarrierSet::satb_mark_queue_set()),
5961
_gclab(NULL),
6062
_gclab_size(0),
63+
_plab(NULL),
64+
_plab_size(0),
6165
_worker_id(INVALID_WORKER_ID),
6266
_disarmed_value(0),
6367
_paced_time(0) {
@@ -71,6 +75,9 @@ class ShenandoahThreadLocalData {
7175
if (_gclab != NULL) {
7276
delete _gclab;
7377
}
78+
if (_plab != NULL) {
79+
delete _plab;
80+
}
7481
}
7582

7683
static ShenandoahThreadLocalData* data(Thread* thread) {
@@ -118,6 +125,8 @@ class ShenandoahThreadLocalData {
118125
assert(data(thread)->_gclab == NULL, "Only initialize once");
119126
data(thread)->_gclab = new PLAB(PLAB::min_size());
120127
data(thread)->_gclab_size = 0;
128+
data(thread)->_plab = new PLAB(PLAB::min_size());
129+
data(thread)->_plab_size = 0;
121130
}
122131

123132
static PLAB* gclab(Thread* thread) {
@@ -132,6 +141,18 @@ class ShenandoahThreadLocalData {
132141
data(thread)->_gclab_size = v;
133142
}
134143

144+
static PLAB* plab(Thread* thread) {
145+
return data(thread)->_plab;
146+
}
147+
148+
static size_t plab_size(Thread* thread) {
149+
return data(thread)->_plab_size;
150+
}
151+
152+
static void set_plab_size(Thread* thread, size_t v) {
153+
data(thread)->_plab_size = v;
154+
}
155+
135156
static void add_paced_time(Thread* thread, double v) {
136157
data(thread)->_paced_time += v;
137158
}

‎src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,11 @@ class ShenandoahVerifyHeapRegionClosure : public ShenandoahHeapRegionClosure {
435435
verify(r, r->get_gclab_allocs() <= r->capacity(),
436436
"GCLAB alloc count should not be larger than capacity");
437437

438-
verify(r, r->get_shared_allocs() + r->get_tlab_allocs() + r->get_gclab_allocs() == r->used(),
439-
"Accurate accounting: shared + TLAB + GCLAB = used");
438+
verify(r, r->get_plab_allocs() <= r->capacity(),
439+
"PLAB alloc count should not be larger than capacity");
440+
441+
verify(r, r->get_shared_allocs() + r->get_tlab_allocs() + r->get_gclab_allocs() + r->get_plab_allocs() == r->used(),
442+
"Accurate accounting: shared + TLAB + GCLAB + PLAB = used");
440443

441444
verify(r, !r->is_empty() || !r->has_live(),
442445
"Empty regions should not have live data");

‎src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@
224224
product(bool, ShenandoahElasticTLAB, true, DIAGNOSTIC, \
225225
"Use Elastic TLABs with Shenandoah") \
226226
\
227+
product(bool, ShenandoahUsePLAB, true, DIAGNOSTIC, \
228+
"Use PLABs for object promotions with Shenandoah, " \
229+
"if in generational mode and UseTLAB is also set.") \
230+
\
227231
product(uintx, ShenandoahEvacReserve, 5, EXPERIMENTAL, \
228232
"How much of heap to reserve for evacuations. Larger values make "\
229233
"GC evacuate more live objects on every cycle, while leaving " \

0 commit comments

Comments
 (0)
Please sign in to comment.