Is there have error in selector or left/right rotate with these two functions (in inc_common.cl)
(I think the undo_utf16be_S has error in left/right rotate operation)
DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
{
#if defined IS_NV
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x4602);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x4602);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x04060002);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x04060002);
#else
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
| ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0);
out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16)
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
#endif
}
and
DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
{
#if defined IS_NV
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x6420);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x6420);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x06040200);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x06040200);
#else
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
| ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
#endif
}
when I use following to test the left/right rotate operation
u32 in1[4], in2[4], out[4];
in1[0] = 0x03020100;
in1[1] = 0x13121110;
in1[2] = 0x23222120;
in1[3] = 0x33323130;
in2[0] = 0x07060504;
in2[1] = 0x17161514;
in2[2] = 0x27262524;
in2[3] = 0x37363534;
undo_utf16be_S(in1, in2, out);
undo_utf16le_S(in1, in2, out);
the utf16be_S output is:
out[0] = 0x13110301
out[1] = 0x33312321
out[2] = 0x17150705
out[2] = 0x37352725
and the utf16le_S output is:
out[0] = 0x12100200
out[1] = 0x32302220
out[2] = 0x16140604
out[2] = 0x36342624
as the selector for utf16be_S is 0x4602, and the selector for utf16le_S is 0x6420, the left/right rotate operation
result is not compatible with the selector operation result.
(I think the undo_utf16be_S has error in left/right rotate operation)
DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
{
#if defined IS_NV
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x4602);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x4602);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x04060002);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x04060002);
#else
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
| ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0);
out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16)
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
#endif
}
and
DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
{
#if defined IS_NV
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x6420);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x6420);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x06040200);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x06040200);
#else
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
| ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
#endif
}
when I use following to test the left/right rotate operation
u32 in1[4], in2[4], out[4];
in1[0] = 0x03020100;
in1[1] = 0x13121110;
in1[2] = 0x23222120;
in1[3] = 0x33323130;
in2[0] = 0x07060504;
in2[1] = 0x17161514;
in2[2] = 0x27262524;
in2[3] = 0x37363534;
undo_utf16be_S(in1, in2, out);
undo_utf16le_S(in1, in2, out);
the utf16be_S output is:
out[0] = 0x13110301
out[1] = 0x33312321
out[2] = 0x17150705
out[2] = 0x37352725
and the utf16le_S output is:
out[0] = 0x12100200
out[1] = 0x32302220
out[2] = 0x16140604
out[2] = 0x36342624
as the selector for utf16be_S is 0x4602, and the selector for utf16le_S is 0x6420, the left/right rotate operation
result is not compatible with the selector operation result.