@@ -833,6 +833,67 @@ HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size)
833
833
return gclab->allocate (size);
834
834
}
835
835
836
+ HeapWord* ShenandoahHeap::allocate_from_plab_slow (Thread* thread, size_t size) {
837
+ // New object should fit the PLAB size
838
+ size_t min_size = MAX2 (size, PLAB::min_size ());
839
+
840
+ // Figure out size of new PLAB, looking back at heuristics. Expand aggressively.
841
+ size_t new_size = ShenandoahThreadLocalData::plab_size (thread) * 2 ;
842
+ new_size = MIN2 (new_size, PLAB::max_size ());
843
+ new_size = MAX2 (new_size, PLAB::min_size ());
844
+
845
+ // Record new heuristic value even if we take any shortcut. This captures
846
+ // the case when moderately-sized objects always take a shortcut. At some point,
847
+ // heuristics should catch up with them.
848
+ ShenandoahThreadLocalData::set_plab_size (thread, new_size);
849
+
850
+ if (new_size < size) {
851
+ // New size still does not fit the object. Fall back to shared allocation.
852
+ // This avoids retiring perfectly good PLABs, when we encounter a large object.
853
+ return NULL ;
854
+ }
855
+
856
+ // Retire current PLAB, and allocate a new one.
857
+ PLAB* plab = ShenandoahThreadLocalData::plab (thread);
858
+ retire_plab (plab);
859
+
860
+ size_t actual_size = 0 ;
861
+ HeapWord* plab_buf = allocate_new_plab (min_size, new_size, &actual_size);
862
+ if (plab_buf == NULL ) {
863
+ return NULL ;
864
+ }
865
+
866
+ assert (size <= actual_size, " allocation should fit" );
867
+
868
+ if (ZeroTLAB) {
869
+ // ..and clear it.
870
+ Copy::zero_to_words (plab_buf, actual_size);
871
+ } else {
872
+ // ...and zap just allocated object.
873
+ #ifdef ASSERT
874
+ // Skip mangling the space corresponding to the object header to
875
+ // ensure that the returned space is not considered parsable by
876
+ // any concurrent GC thread.
877
+ size_t hdr_size = oopDesc::header_size ();
878
+ Copy::fill_to_words (plab_buf + hdr_size, actual_size - hdr_size, badHeapWordVal);
879
+ #endif // ASSERT
880
+ }
881
+ plab->set_buf (plab_buf, actual_size);
882
+ return plab->allocate (size);
883
+ }
884
+
885
+ void ShenandoahHeap::retire_plab (PLAB* plab) {
886
+ size_t waste = plab->waste ();
887
+ HeapWord* top = plab->top ();
888
+ plab->retire ();
889
+ if (top != NULL && plab->waste () > waste) {
890
+ // If retiring the plab created a filler object, then we
891
+ // need to register it with our card scanner so it can
892
+ // safely walk the region backing the plab.
893
+ card_scan ()->register_object (top);
894
+ }
895
+ }
896
+
836
897
HeapWord* ShenandoahHeap::allocate_new_tlab (size_t min_size,
837
898
size_t requested_size,
838
899
size_t * actual_size) {
@@ -859,6 +920,19 @@ HeapWord* ShenandoahHeap::allocate_new_gclab(size_t min_size,
859
920
return res;
860
921
}
861
922
923
+ HeapWord* ShenandoahHeap::allocate_new_plab (size_t min_size,
924
+ size_t word_size,
925
+ size_t * actual_size) {
926
+ ShenandoahAllocRequest req = ShenandoahAllocRequest::for_plab (min_size, word_size);
927
+ HeapWord* res = allocate_memory (req);
928
+ if (res != NULL ) {
929
+ *actual_size = req.actual_size ();
930
+ } else {
931
+ *actual_size = 0 ;
932
+ }
933
+ return res;
934
+ }
935
+
862
936
HeapWord* ShenandoahHeap::allocate_memory (ShenandoahAllocRequest& req) {
863
937
intptr_t pacer_epoch = 0 ;
864
938
bool in_new_region = false ;
@@ -954,23 +1028,25 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req
954
1028
//
955
1029
// The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as last-start
956
1030
// representing object b while first-start represents object c. This is why we need to require all register_object()
957
- // invocations to be "mutually exclusive". Later, when we use GCLABs to allocate memory for promotions and evacuations,
1031
+ // invocations to be "mutually exclusive". Later, when we use GCLABs and PLABs to allocate memory for promotions and evacuations,
958
1032
// the protocol may work something like the following:
959
- // 1. The GCLAB is allocated by this (or similar) function, while holding the global lock.
960
- // 2. The GCLAB is registered as a single object.
961
- // / 3. The GCLAB is always aligned at the start of a card memory range and is always a multiple of the card-table memory range size
962
- // 3. Individual allocations carved from the GCLAB are not immediately registered
963
- // 4. When the GCLAB is eventually retired, all of the objects allocated within the GCLAB are registered in batch by a
964
- // single thread. No further synchronization is required because no other allocations will pertain to the same
1033
+ // 1. The GCLAB/PLAB is allocated by this (or similar) function, while holding the global lock.
1034
+ // 2. The GCLAB/PLAB is always aligned at the start of a card memory range
1035
+ // and is always a multiple of the card-table memory range size.
1036
+ // 3. Individual allocations carved from a GCLAB/PLAB are not immediately registered.
1037
+ // 4. A PLAB is registered as a single object.
1038
+ // 5. When a PLAB is eventually retired, all of the objects allocated within the GCLAB/PLAB are registered in batch by a
1039
+ // single thread. No further synchronization is required because no other allocations will pertain to the same
965
1040
// card-table memory ranges.
966
1041
//
967
- // The other case that needs special handling is promotion of regions en masse. When the region is promoted, all objects contained
968
- // within the region are registered. Since the region is a multiple of card-table memory range sizes, there is no need for
969
- // synchronization. It might be nice to figure out how to allow multiple threads to work together to register all of the objects in
970
- // a promoted region, or at least try to balance the efforts so that different gc threads work on registering the objects of
971
- // different heap regions. But that effort will come later.
1042
+ // The other case that needs special handling is region promotion. When a region is promoted, all objects contained
1043
+ // in it are registered. Since the region is a multiple of card table memory range sizes, there is no need for
1044
+ // synchronization.
1045
+ // TODO: figure out how to allow multiple threads to work together to register all of the objects in
1046
+ // a promoted region, or at least try to balance the efforts so that different GC threads work
1047
+ // on registering the objects of different heap regions.
972
1048
//
973
- if (result != NULL && req.affiliation () == ShenandoahRegionAffiliation::OLD_GENERATION) {
1049
+ if (mode ()-> is_generational () && result != NULL && req.affiliation () == ShenandoahRegionAffiliation::OLD_GENERATION) {
974
1050
ShenandoahHeap::heap ()->card_scan ()->register_object (result);
975
1051
}
976
1052
return result;
@@ -1140,6 +1216,10 @@ class ShenandoahCheckCleanGCLABClosure : public ThreadClosure {
1140
1216
PLAB* gclab = ShenandoahThreadLocalData::gclab (thread);
1141
1217
assert (gclab != NULL , " GCLAB should be initialized for %s" , thread->name ());
1142
1218
assert (gclab->words_remaining () == 0 , " GCLAB should not need retirement" );
1219
+
1220
+ PLAB* plab = ShenandoahThreadLocalData::plab (thread);
1221
+ assert (plab != NULL , " PLAB should be initialized for %s" , thread->name ());
1222
+ assert (plab->words_remaining () == 0 , " PLAB should not need retirement" );
1143
1223
}
1144
1224
};
1145
1225
@@ -1155,6 +1235,13 @@ class ShenandoahRetireGCLABClosure : public ThreadClosure {
1155
1235
if (_resize && ShenandoahThreadLocalData::gclab_size (thread) > 0 ) {
1156
1236
ShenandoahThreadLocalData::set_gclab_size (thread, 0 );
1157
1237
}
1238
+
1239
+ PLAB* plab = ShenandoahThreadLocalData::plab (thread);
1240
+ assert (plab != NULL , " PLAB should be initialized for %s" , thread->name ());
1241
+ ShenandoahHeap::heap ()->retire_plab (plab);
1242
+ if (_resize && ShenandoahThreadLocalData::plab_size (thread) > 0 ) {
1243
+ ShenandoahThreadLocalData::set_plab_size (thread, 0 );
1244
+ }
1158
1245
}
1159
1246
};
1160
1247
0 commit comments