[lttng-dev] [PATCH babeltrace v3] Fix: bitfield: left shift undefined behavior

Wed May 8 18:23:18 EDT 2019

bitfield.h uses the left shift operator with a left operand which
may be negative. The C99 standard states that shifting a negative
value is undefined.

When building with -Wshift-negative-value, we get this gcc warning:

In file included from /home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h:44:0,
                 from /home/smarchi/src/babeltrace/ctfser/ctfser.c:42:
/home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h: In function ‘bt_ctfser_write_unsigned_int’:
/home/smarchi/src/babeltrace/include/babeltrace/bitfield-internal.h:116:24: error: left shift of negative value [-Werror=shift-negative-value]
   mask = ~((~(type) 0) << (__start % ts));  \
                        ^
/home/smarchi/src/babeltrace/include/babeltrace/bitfield-internal.h:222:2: note: in expansion of macro ‘_bt_bitfield_write_le’
  _bt_bitfield_write_le(ptr, type, _start, _length, _v)
  ^~~~~~~~~~~~~~~~~~~~~
/home/smarchi/src/babeltrace/include/babeltrace/ctfser-internal.h:418:3: note: in expansion of macro ‘bt_bitfield_write_le’
   bt_bitfield_write_le(mmap_align_addr(ctfser->base_mma) +
   ^~~~~~~~~~~~~~~~~~~~

This boils down to the fact that the expression ~((uint8_t)0) has type
"signed int", which is used as an operand of the left shift.  This is due
to the integer promotion rules of C99 (6.3.3.1):

    If an int can represent all values of the original type, the value is
    converted to an int; otherwise, it is converted to an unsigned int.
    These are called the integer promotions. All other types are unchanged
    by the integer promotions.

We also need to cast the result explicitly into the left hand
side type to deal with:

warning: large integer implicitly truncated to unsigned type [-Woverflow]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
---
Changes since v1:
- Generate compile-time error if the type argument passed to
  _bt_unsigned_cast() is larger than sizeof(uint64_t), this
  allows removing _bt_check_max_64bit,
- Introduce _br_fill_mask, which replaces _bt_bitwise_not,
- Clarify _bt_unsigned_cast comment,
- Expand explanation of the issue within the patch commit message.

Changes since v2:
- Fix unwanted sign extension when generating masks,
- Introduce macro helpers to clarify code:
  - _bt_cast_value_to_unsigned()
  - _bt_cast_value_to_unsigned_type(),
  - _bt_make_mask_complement(),
  - _bt_make_mask().
---
 include/babeltrace/bitfield-internal.h | 128 ++++++++++++++++++++-------------
 1 file changed, 77 insertions(+), 51 deletions(-)

diff --git a/include/babeltrace/bitfield-internal.h b/include/babeltrace/bitfield-internal.h
index c5d5eccd..27e0fdfd 100644
--- a/include/babeltrace/bitfield-internal.h
+++ b/include/babeltrace/bitfield-internal.h
@@ -55,21 +55,55 @@
 #define _bt_is_signed_type(type)	((type) -1 < (type) 0)
 
 /*
- * NOTE: The cast to (uint64_t) below ensures that we're not casting a
- * negative value, which is undefined in C. However, this limits the
- * maximum type size of `type` and `v` to 64-bit. The
- * _bt_check_max_64bit() is used to check that the users of this header
- * do not use types with a size greater than 64-bit.
+ * Cast value `v` to an unsigned integer of the same size as `v`.
  */
-#define _bt_unsigned_cast(type, v)					\
-({									\
-	(sizeof(v) < sizeof(type)) ?					\
-		((type) (v)) & ((type) (~(~(uint64_t) 0 << (sizeof(v) * CHAR_BIT)))) : \
-		(type) (v);						\
-})
+#define _bt_cast_value_to_unsigned(v)					\
+	(sizeof(v) == sizeof(uint8_t) ? (uint8_t) (v) :			\
+	sizeof(v) == sizeof(uint16_t) ? (uint16_t) (v) :		\
+	sizeof(v) == sizeof(uint32_t) ? (uint32_t) (v) :		\
+	sizeof(v) == sizeof(uint64_t) ? (uint64_t) (v) :		\
+	sizeof(struct { int f:(sizeof(v) > sizeof(uint64_t) ? -1 : 1); }))
+
+/*
+ * Cast value `v` to an unsigned integer type of the size of type `type`
+ * *without* sign-extension.
+ *
+ * The unsigned cast ensures that we're not shifting a negative value,
+ * which is undefined in C. However, this limits the maximum type size
+ * of `type` to 64-bit. Generate a compile-time error if the size of
+ * `type` is larger than 64-bit.
+ */
+#define _bt_cast_value_to_unsigned_type(type, v)			\
+	(sizeof(type) == sizeof(uint8_t) ?				\
+		(uint8_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint16_t) ?				\
+		(uint16_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint32_t) ?				\
+		(uint32_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(type) == sizeof(uint64_t) ?				\
+		(uint64_t) _bt_cast_value_to_unsigned(v) :		\
+	sizeof(struct { int f:(sizeof(type) > sizeof(uint64_t) ? -1 : 1); }))
 
-#define _bt_check_max_64bit(type)					\
-	char _max_64bit_assertion[sizeof(type) <= sizeof(uint64_t) ? 1 : -1] __attribute__((unused))
+/*
+ * _bt_fill_mask evaluates to an unsigned integer with the size of
+ * "type" with all bits set. It is meant to be used as a left operand to
+ * the shift-left operator to create bit masks.
+ */
+#define _bt_fill_mask(type)						\
+	_bt_cast_value_to_unsigned_type(type, ~(type) 0)
+
+/*
+ * Generate a mask of type `type` with the `length` least significant bits
+ * cleared, and the most significant bits set.
+ */
+#define _bt_make_mask_complement(type, length)				\
+	((type) (_bt_fill_mask(type) << (length)))
+/*
+ * Generate a mask of type `type` with the `length` least significant bits
+ * set, and the most significant bits cleared.
+ */
+#define _bt_make_mask(type, length)					\
+	((type) ~_bt_make_mask_complement(type, length))
 
 /*
  * bt_bitfield_write - write integer to a bitfield in native endianness
@@ -108,15 +142,15 @@ do {									\
 									\
 	/* Trim v high bits */						\
 	if (__length < sizeof(__v) * CHAR_BIT)				\
-		__v &= ~((~(typeof(__v)) 0) << __length);		\
+		__v &= _bt_make_mask(typeof(__v), __length);		\
 									\
 	/* We can now append v with a simple "or", shift it piece-wise */ \
 	this_unit = start_unit;						\
 	if (start_unit == end_unit - 1) {				\
-		mask = ~((~(type) 0) << (__start % ts));		\
+		mask = _bt_make_mask(type, __start % ts);		\
 		if (end % ts)						\
-			mask |= (~(type) 0) << (end % ts);		\
-		cmask = (type) __v << (__start % ts);			\
+			mask |= _bt_make_mask_complement(type, end % ts); \
+		cmask = (type) (_bt_cast_value_to_unsigned((type) (__v)) << (__start % ts)); \
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -124,8 +158,8 @@ do {									\
 	}								\
 	if (__start % ts) {						\
 		cshift = __start % ts;					\
-		mask = ~((~(type) 0) << cshift);			\
-		cmask = (type) __v << cshift;				\
+		mask = _bt_make_mask(type, cshift);			\
+		cmask = (type) (_bt_cast_value_to_unsigned((type) (__v)) << cshift); \
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -139,7 +173,7 @@ do {									\
 		__start += ts;						\
 	}								\
 	if (end % ts) {							\
-		mask = (~(type) 0) << (end % ts);			\
+		mask = _bt_make_mask_complement(type, end % ts);	\
 		cmask = (type) __v;					\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
@@ -167,15 +201,15 @@ do {									\
 									\
 	/* Trim v high bits */						\
 	if (__length < sizeof(__v) * CHAR_BIT)				\
-		__v &= ~((~(typeof(__v)) 0) << __length);		\
+		__v &= _bt_make_mask(typeof(__v), __length);		\
 									\
 	/* We can now append v with a simple "or", shift it piece-wise */ \
 	this_unit = end_unit - 1;					\
 	if (start_unit == end_unit - 1) {				\
-		mask = ~((~(type) 0) << ((ts - (end % ts)) % ts));	\
+		mask = _bt_make_mask(type, (ts - (end % ts)) % ts);	\
 		if (__start % ts)					\
-			mask |= (~((type) 0)) << (ts - (__start % ts));	\
-		cmask = (type) __v << ((ts - (end % ts)) % ts);		\
+			mask |= _bt_make_mask_complement(type, ts - (__start % ts)); \
+		cmask = (type) (_bt_cast_value_to_unsigned((type) (__v)) << ((ts - (end % ts)) % ts)); \
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -183,8 +217,8 @@ do {									\
 	}								\
 	if (end % ts) {							\
 		cshift = end % ts;					\
-		mask = ~((~(type) 0) << (ts - cshift));			\
-		cmask = (type) __v << (ts - cshift);			\
+		mask = _bt_make_mask(type, ts - cshift);		\
+		cmask = (type) (_bt_cast_value_to_unsigned((type) (__v)) << (ts - cshift)); \
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
 		__ptr[this_unit] |= cmask;				\
@@ -198,7 +232,7 @@ do {									\
 		end -= ts;						\
 	}								\
 	if (__start % ts) {						\
-		mask = (~(type) 0) << (ts - (__start % ts));		\
+		mask = _bt_make_mask(type, ts - (__start % ts));	\
 		cmask = (type) __v;					\
 		cmask &= ~mask;						\
 		__ptr[this_unit] &= mask;				\
@@ -252,10 +286,6 @@ do {									\
 	unsigned long start_unit, end_unit, this_unit;			\
 	unsigned long end, cshift; /* cshift is "complement shift" */	\
 									\
-	{ _bt_check_max_64bit(type); }					\
-	{ _bt_check_max_64bit(typeof(*_vptr)); }			\
-	{ _bt_check_max_64bit(typeof(*_ptr)); }				\
-									\
 	if (!__length) {						\
 		*__vptr = 0;						\
 		break;							\
@@ -275,39 +305,39 @@ do {									\
 		cmask = __ptr[this_unit];				\
 		cmask >>= (__start % ts);				\
 		if ((end - __start) % ts) {				\
-			mask = ~((~(type) 0) << (end - __start));	\
+			mask = _bt_make_mask(type, end - __start);	\
 			cmask &= mask;					\
 		}							\
 		__v = _bt_piecewise_lshift(__v, end - __start);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		*__vptr = __v;						\
 		break;							\
 	}								\
 	if (end % ts) {							\
 		cshift = end % ts;					\
-		mask = ~((~(type) 0) << cshift);			\
+		mask = _bt_make_mask(type, cshift);			\
 		cmask = __ptr[this_unit];				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, cshift);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		end -= cshift;						\
 		this_unit--;						\
 	}								\
 	for (; (long) this_unit >= (long) start_unit + 1; this_unit--) { \
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 		end -= ts;						\
 	}								\
 	if (__start % ts) {						\
-		mask = ~((~(type) 0) << (ts - (__start % ts)));		\
+		mask = _bt_make_mask(type, ts - (__start % ts));	\
 		cmask = __ptr[this_unit];				\
 		cmask >>= (__start % ts);				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, ts - (__start % ts));	\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 	} else {							\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 	}								\
 	*__vptr = __v;							\
 } while (0)
@@ -323,10 +353,6 @@ do {									\
 	unsigned long start_unit, end_unit, this_unit;			\
 	unsigned long end, cshift; /* cshift is "complement shift" */	\
 									\
-	{ _bt_check_max_64bit(type); }					\
-	{ _bt_check_max_64bit(typeof(*_vptr)); }			\
-	{ _bt_check_max_64bit(typeof(*_ptr)); }				\
-									\
 	if (!__length) {						\
 		*__vptr = 0;						\
 		break;							\
@@ -346,39 +372,39 @@ do {									\
 		cmask = __ptr[this_unit];				\
 		cmask >>= (ts - (end % ts)) % ts;			\
 		if ((end - __start) % ts) {				\
-			mask = ~((~(type) 0) << (end - __start));	\
+			mask = _bt_make_mask(type, end - __start);	\
 			cmask &= mask;					\
 		}							\
 		__v = _bt_piecewise_lshift(__v, end - __start);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		*__vptr = __v;						\
 		break;							\
 	}								\
 	if (__start % ts) {						\
 		cshift = __start % ts;					\
-		mask = ~((~(type) 0) << (ts - cshift));			\
+		mask = _bt_make_mask(type, ts - cshift);		\
 		cmask = __ptr[this_unit];				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, ts - cshift);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 		__start += ts - cshift;					\
 		this_unit++;						\
 	}								\
 	for (; this_unit < end_unit - 1; this_unit++) {			\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 		__start += ts;						\
 	}								\
 	if (end % ts) {							\
-		mask = ~((~(type) 0) << (end % ts));			\
+		mask = _bt_make_mask(type, end % ts);			\
 		cmask = __ptr[this_unit];				\
 		cmask >>= ts - (end % ts);				\
 		cmask &= mask;						\
 		__v = _bt_piecewise_lshift(__v, end % ts);		\
-		__v |= _bt_unsigned_cast(typeof(__v), cmask);		\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), cmask); \
 	} else {							\
 		__v = _bt_piecewise_lshift(__v, ts);			\
-		__v |= _bt_unsigned_cast(typeof(__v), __ptr[this_unit]);\
+		__v |= _bt_cast_value_to_unsigned_type(typeof(__v), __ptr[this_unit]); \
 	}								\
 	*__vptr = __v;							\
 } while (0)
-- 
2.11.0