currently I have something like this: and this don't work
Code:
KERNEL_FQ KERNEL_FA void m26630_comp (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, merged_options_t))
{
const u64 gid = get_global_id(0);
const u64 lid = get_local_id(0);
const u64 lsz = get_local_size(0);
#ifdef REAL_SHM
LOCAL_VK u32 s_te0[256], s_te1[256], s_te2[256], s_te3[256], s_te4[256];
for (u32 i = lid; i < 256; i += lsz) {
s_te0[i] = te0[i]; s_te1[i] = te1[i]; s_te2[i] = te2[i];
s_te3[i] = te3[i]; s_te4[i] = te4[i];
}
SYNC_THREADS();
#else
CONSTANT_AS u32a *s_te0 = te0, *s_te1 = te1, *s_te2 = te2, *s_te3 = te3, *s_te4 = te4;
#endif
if (gid >= GID_CNT) return;
const u32 iv[4] = {
esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.iv[0],
esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.iv[1],
esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.iv[2],
esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.iv[3]
};
const u32 ct_len = esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.ct_len;
u32 ct_buf[256];
const u32 ct_len_u32 = (ct_len + 3) / 4;
for (u32 i = 0; i < ct_len_u32; i++) {
ct_buf[i] = esalt_bufs[DIGESTS_OFFSET_HOST].aes_data.ct_buf[i];
}
const argon2_options_t argon2_options = esalt_bufs[DIGESTS_OFFSET_HOST].argon2_options;
const u32 gd4 = gid / 4;
const u32 gm4 = gid % 4;
GLOBAL_AS void *V;
switch (gm4) { case 0: V = d_extra0_buf; break; case 1: V = d_extra1_buf; break; case 2: V = d_extra2_buf; break; default: V = d_extra3_buf; break; }
GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block(&argon2_options, V, gd4);
u32 out[8];
argon2_final(argon2_block, &argon2_options, out);
const u8 *raw_key_bytes = (const u8 *)out;
CONSTANT_AS char hex_chars[16] = "0123456789abcdef";
u8 aes_key_hex[32];
#pragma unroll
for (int i = 0; i < 16; i++) {
const u8 b = raw_key_bytes[i];
aes_key_hex[i*2] = (u8)hex_chars[(b >> 4) & 0xF];
aes_key_hex[i*2 + 1] = (u8)hex_chars[b & 0xF];
}
u32 ukey[8];
#pragma unroll
for (int i = 0; i < 8; i++) {
const int o = i*4;
ukey[i] = ((u32)aes_key_hex[o + 0] << 24) | ((u32)aes_key_hex[o + 1] << 16) | ((u32)aes_key_hex[o + 2] << 8) | ((u32)aes_key_hex[o + 3]);
}
u32 key[60] = {0};
u32 subKey[4] = {0};
u32 key_len = 32*8;
AES_GCM_Init(ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4);
u32 J0[4];
AES_GCM_Prepare_J0(iv, 16, subKey, J0);
u32 T[4];
u32 S[4] = {0};
u32 aad_buf[4] = {0};
AES_GCM_GHASH(subKey, aad_buf, 0, ct_buf, ct_len, S);
AES_GCM_GCTR(key, J0, S, 16, T, s_te0, s_te1, s_te2, s_te3, s_te4);
const u32 r0 = T[0];
const u32 r1 = T[1];
const u32 r2 = T[2];
const u32 r3 = T[3];
#define il_pos 0
#ifdef KERNEL_STATIC
#include COMPARE_M
#endif
}