@@ -58,11 +58,13 @@ class RegMask {
58
58
59
59
friend class RegMaskIterator ;
60
60
61
- enum {
62
- _WordBits = BitsPerWord,
63
- _LogWordBits = LogBitsPerWord,
64
- _RM_SIZE = LP64_ONLY (align_up (RM_SIZE, 2 ) >> 1 ) NOT_LP64 (RM_SIZE)
65
- };
61
+ // The RM_SIZE is aligned to 64-bit - assert that this holds
62
+ LP64_ONLY (STATIC_ASSERT(is_aligned(RM_SIZE, 2 )));
63
+
64
+ static const unsigned int _WordBitMask = BitsPerWord - 1U ;
65
+ static const unsigned int _LogWordBits = LogBitsPerWord;
66
+ static const unsigned int _RM_SIZE = LP64_ONLY(RM_SIZE >> 1 ) NOT_LP64(RM_SIZE);
67
+ static const unsigned int _RM_MAX = _RM_SIZE - 1U ;
66
68
67
69
union {
68
70
// Array of Register Mask bits. This array is large enough to cover
@@ -82,7 +84,7 @@ class RegMask {
82
84
unsigned int _hwm;
83
85
84
86
public:
85
- enum { CHUNK_SIZE = RM_SIZE*BitsPerInt };
87
+ enum { CHUNK_SIZE = _RM_SIZE * BitsPerWord };
86
88
87
89
// SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
88
90
// Also, consider the maximum alignment size for a normally allocated
@@ -121,7 +123,7 @@ class RegMask {
121
123
FORALL_BODY
122
124
# undef BODY
123
125
_lwm = 0 ;
124
- _hwm = _RM_SIZE - 1 ;
126
+ _hwm = _RM_MAX ;
125
127
while (_hwm > 0 && _RM_UP[_hwm] == 0 ) _hwm--;
126
128
while ((_lwm < _hwm) && _RM_UP[_lwm] == 0 ) _lwm++;
127
129
assert (valid_watermarks (), " post-condition" );
@@ -138,7 +140,7 @@ class RegMask {
138
140
}
139
141
140
142
// Construct an empty mask
141
- RegMask () : _RM_UP(), _lwm(_RM_SIZE - 1 ), _hwm(0 ) {
143
+ RegMask () : _RM_UP(), _lwm(_RM_MAX ), _hwm(0 ) {
142
144
assert (valid_watermarks (), " post-condition" );
143
145
}
144
146
@@ -152,15 +154,19 @@ class RegMask {
152
154
assert (reg < CHUNK_SIZE, " " );
153
155
154
156
unsigned r = (unsigned )reg;
155
- return _RM_UP[r >> _LogWordBits] & (uintptr_t (1 ) <<(r & (_WordBits - 1U ) ));
157
+ return _RM_UP[r >> _LogWordBits] & (uintptr_t (1 ) << (r & _WordBitMask ));
156
158
}
157
159
158
160
// The last bit in the register mask indicates that the mask should repeat
159
161
// indefinitely with ONE bits. Returns TRUE if mask is infinite or
160
162
// unbounded in size. Returns FALSE if mask is finite size.
161
- bool is_AllStack () const { return _RM_UP[_RM_SIZE - 1U ] >> (_WordBits - 1U ); }
163
+ bool is_AllStack () const {
164
+ return (_RM_UP[_RM_MAX] & (uintptr_t (1 ) << _WordBitMask)) != 0 ;
165
+ }
162
166
163
- void set_AllStack () { Insert (OptoReg::Name (CHUNK_SIZE-1 )); }
167
+ void set_AllStack () {
168
+ _RM_UP[_RM_MAX] |= (uintptr_t (1 ) << _WordBitMask);
169
+ }
164
170
165
171
// Test for being a not-empty mask.
166
172
bool is_NotEmpty () const {
@@ -178,7 +184,7 @@ class RegMask {
178
184
for (unsigned i = _lwm; i <= _hwm; i++) {
179
185
uintptr_t bits = _RM_UP[i];
180
186
if (bits) {
181
- return OptoReg::Name ((i<< _LogWordBits) + find_lowest_bit (bits));
187
+ return OptoReg::Name ((i << _LogWordBits) + find_lowest_bit (bits));
182
188
}
183
189
}
184
190
return OptoReg::Name (OptoReg::Bad);
@@ -192,7 +198,7 @@ class RegMask {
192
198
while (i > _lwm) {
193
199
uintptr_t bits = _RM_UP[--i];
194
200
if (bits) {
195
- return OptoReg::Name ((i<< _LogWordBits) + find_highest_bit (bits));
201
+ return OptoReg::Name ((i << _LogWordBits) + find_highest_bit (bits));
196
202
}
197
203
}
198
204
return OptoReg::Name (OptoReg::Bad);
@@ -270,17 +276,17 @@ class RegMask {
270
276
271
277
// Clear a register mask
272
278
void Clear () {
273
- _lwm = _RM_SIZE - 1 ;
279
+ _lwm = _RM_MAX ;
274
280
_hwm = 0 ;
275
- memset (_RM_UP, 0 , sizeof (uintptr_t )* _RM_SIZE);
281
+ memset (_RM_UP, 0 , sizeof (uintptr_t ) * _RM_SIZE);
276
282
assert (valid_watermarks (), " sanity" );
277
283
}
278
284
279
285
// Fill a register mask with 1's
280
286
void Set_All () {
281
287
_lwm = 0 ;
282
- _hwm = _RM_SIZE - 1 ;
283
- memset (_RM_UP, 0xFF , sizeof (uintptr_t )* _RM_SIZE);
288
+ _hwm = _RM_MAX ;
289
+ memset (_RM_UP, 0xFF , sizeof (uintptr_t ) * _RM_SIZE);
284
290
assert (valid_watermarks (), " sanity" );
285
291
}
286
292
@@ -294,15 +300,15 @@ class RegMask {
294
300
unsigned index = r >> _LogWordBits;
295
301
if (index > _hwm) _hwm = index ;
296
302
if (index < _lwm) _lwm = index ;
297
- _RM_UP[index ] |= (uintptr_t (1 ) << (r & (_WordBits - 1U ) ));
303
+ _RM_UP[index ] |= (uintptr_t (1 ) << (r & _WordBitMask ));
298
304
assert (valid_watermarks (), " post-condition" );
299
305
}
300
306
301
307
// Remove register from mask
302
308
void Remove (OptoReg::Name reg) {
303
309
assert (reg < CHUNK_SIZE, " " );
304
310
unsigned r = (unsigned )reg;
305
- _RM_UP[r >> _LogWordBits] &= ~(uintptr_t (1 ) << (r & (_WordBits- 1U ) ));
311
+ _RM_UP[r >> _LogWordBits] &= ~(uintptr_t (1 ) << (r & _WordBitMask ));
306
312
}
307
313
308
314
// OR 'rm' into 'this'
@@ -355,23 +361,23 @@ class RegMask {
355
361
// NOTE: -1 in computation reflects the usage of the last
356
362
// bit of the regmask as an infinite stack flag and
357
363
// -7 is to keep mask aligned for largest value (VecZ).
358
- return (int )reg < (int )(CHUNK_SIZE- 1 );
364
+ return (int )reg < (int )(CHUNK_SIZE - 1 );
359
365
}
360
366
static bool can_represent_arg (OptoReg::Name reg) {
361
367
// NOTE: -SlotsPerVecZ in computation reflects the need
362
368
// to keep mask aligned for largest value (VecZ).
363
- return (int )reg < (int )(CHUNK_SIZE- SlotsPerVecZ);
369
+ return (int )reg < (int )(CHUNK_SIZE - SlotsPerVecZ);
364
370
}
365
371
};
366
372
367
373
class RegMaskIterator {
368
374
private:
369
- uintptr_t _current_word ;
375
+ uintptr_t _current_bits ;
370
376
unsigned int _next_index;
371
377
OptoReg::Name _reg;
372
- const RegMask& _rm;
378
+ const RegMask& _rm;
373
379
public:
374
- RegMaskIterator (const RegMask& rm) : _current_word (0 ), _next_index(rm._lwm), _reg(OptoReg::Special ), _rm(rm) {
380
+ RegMaskIterator (const RegMask& rm) : _current_bits (0 ), _next_index(rm._lwm), _reg(OptoReg::Bad ), _rm(rm) {
375
381
// Calculate the first element
376
382
next ();
377
383
}
@@ -383,26 +389,44 @@ class RegMaskIterator {
383
389
// Get the current element and calculate the next
384
390
OptoReg::Name next () {
385
391
OptoReg::Name r = _reg;
386
- if (_current_word != 0 ) {
387
- unsigned int next_bit = find_lowest_bit (_current_word);
392
+
393
+ // This bit shift scheme, borrowed from IndexSetIterator,
394
+ // shifts the _current_bits down by the number of trailing
395
+ // zeros - which leaves the "current" bit on position zero,
396
+ // then subtracts by 1 to clear it. This quirk avoids the
397
+ // undefined behavior that could arise if trying to shift
398
+ // away the bit with a single >> (next_bit + 1) shift when
399
+ // next_bit is 31/63. It also keeps number of shifts and
400
+ // arithmetic ops to a minimum.
401
+
402
+ // We have previously found bits at _next_index - 1, and
403
+ // still have some left at the same index.
404
+ if (_current_bits != 0 ) {
405
+ unsigned int next_bit = find_lowest_bit (_current_bits);
406
+ assert (_reg != OptoReg::Bad, " can't be in a bad state" );
388
407
assert (next_bit > 0 , " must be" );
389
- assert (((_current_word >> next_bit) & 0x1 ) == 1 , " sanity " );
390
- _current_word = (_current_word >> next_bit) - 1 ;
408
+ assert (((_current_bits >> next_bit) & 0x1 ) == 1 , " lowest bit must be set after shift " );
409
+ _current_bits = (_current_bits >> next_bit) - 1 ;
391
410
_reg = OptoReg::add (_reg, next_bit);
392
411
return r;
393
412
}
394
413
414
+ // Find the next word with bits
395
415
while (_next_index <= _rm._hwm ) {
396
- _current_word = _rm._RM_UP [_next_index++];
397
- if (_current_word != 0 ) {
398
- unsigned int next_bit = find_lowest_bit (_current_word);
399
- assert (((_current_word >> next_bit) & 0x1 ) == 1 , " sanity" );
400
- _current_word = (_current_word >> next_bit) - 1 ;
416
+ _current_bits = _rm._RM_UP [_next_index++];
417
+ if (_current_bits != 0 ) {
418
+ // Found a word. Calculate the first register element and
419
+ // prepare _current_bits by shifting it down and clearing
420
+ // the lowest bit
421
+ unsigned int next_bit = find_lowest_bit (_current_bits);
422
+ assert (((_current_bits >> next_bit) & 0x1 ) == 1 , " lowest bit must be set after shift" );
423
+ _current_bits = (_current_bits >> next_bit) - 1 ;
401
424
_reg = OptoReg::Name (((_next_index - 1 ) << RegMask::_LogWordBits) + next_bit);
402
425
return r;
403
426
}
404
427
}
405
428
429
+ // No more bits
406
430
_reg = OptoReg::Name (OptoReg::Bad);
407
431
return r;
408
432
}
0 commit comments