#version 420 core
///shaderType:vertex


in uint inValue;
out uint outValue; // the inValue if needle matches, 0 otherwise

layout(binding=0, offset=0) uniform atomic_uint great_success;

uniform uint chars[100]; // characters to iterate thru
uniform uint numchars;   // actual size of chars array
uniform uvec2 limituint; // min/max values acc. to biggest log to 2^32 in order to avoid full u64 mod/div calculus

uniform uint iterations;

uniform uvec4 needle; // hash to find
uniform uvec2 offset; // 64bit pseudo-hi/lo (acc. to limituint) added to inValue (s.t. we dont need to rebind the whole vbo)
uniform uvec2 prefix; // the prefix/salt and its len (max 4)


void to_str(in uint i, out uvec4 str, out uint len) {
    str[0] = prefix[0];
    str[1] = 0u;
    str[2] = 0u;
    str[3] = 0u;
    len = prefix[1];

    uvec2 id = offset;
    id.y += inValue + i;
    if (id.y > limituint.y) {
        id.y = limituint.x + (id.y - limituint.y) - 1;
        id.x++;
    }

    while (id.y > 0u) {
        id.y--;
        uint rem = id.y % numchars;
        id.y /= numchars;
        str[len/4u] |= (chars[rem] << ((len%4u) * 8u));
        len++;
    }
    while (id.x > 0u) {
        id.x--;
        uint rem = id.x % numchars;
        id.x /= numchars;
        str[len/4u] |= (chars[rem] << ((len%4u) * 8u));
        len++;
    }
}


void md5(const in uvec4 str, const in uint len, out uvec4 H) {
    const uint s[64] = uint[](7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21);
    const uint K[64] = uint[](0xd76aa478u, 0xe8c7b756u, 0x242070dbu, 0xc1bdceeeu, 0xf57c0fafu, 0x4787c62au, 0xa8304613u, 0xfd469501u, 0x698098d8u, 0x8b44f7afu, 0xffff5bb1u, 0x895cd7beu, 0x6b901122u, 0xfd987193u, 0xa679438eu, 0x49b40821u, 0xf61e2562u, 0xc040b340u, 0x265e5a51u, 0xe9b6c7aau, 0xd62f105du, 0x02441453u, 0xd8a1e681u, 0xe7d3fbc8u, 0x21e1cde6u, 0xc33707d6u, 0xf4d50d87u, 0x455a14edu, 0xa9e3e905u, 0xfcefa3f8u, 0x676f02d9u, 0x8d2a4c8au, 0xfffa3942u, 0x8771f681u, 0x6d9d6122u, 0xfde5380cu, 0xa4beea44u, 0x4bdecfa9u, 0xf6bb4b60u, 0xbebfbc70u, 0x289b7ec6u, 0xeaa127fau, 0xd4ef3085u, 0x04881d05u, 0xd9d4d039u, 0xe6db99e5u, 0x1fa27cf8u, 0xc4ac5665u, 0xf4292244u, 0x432aff97u, 0xab9423a7u, 0xfc93a039u, 0x655b59c3u, 0x8f0ccc92u, 0xffeff47du, 0x85845dd1u, 0x6fa87e4fu, 0xfe2ce6e0u, 0xa3014314u, 0x4e0811a1u, 0xf7537e82u, 0xbd3af235u, 0x2ad7d2bbu, 0xeb86d391u);

    uint M[16] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
    M[0] = str[0];
    M[1] = str[1];
    M[2] = str[2];
    M[3] = str[3];
    M[len/4u] |= 128u << ((len%4u) * 8u); // terminate with '1' bit
    M[14] = len * 8u; // as 64-bit little-endian (single byte though)

    const uvec4 h = uvec4(0x67452301u, 0xefcdab89u, 0x98badcfeu, 0x10325476u); // a0, b0, c0, d0
    H = h; // A, B, C, D

    for (uint i=0u; i<64u; i++) {
        uint F;
        uint g;
        if (i < 16u) {
            F = (H[1] & H[2]) | ((~H[1]) & H[3]); // D xor (B and (C xor D))
            g = i;
        } else if (i < 32u) {
            F = (H[3] & H[1]) | ((~H[3]) & H[2]); // C xor (D and (B xor C))
            g = ((5u*i) + 1u) % 16u;
        } else if (i < 48u) {
            F = (H[1] ^ H[2]) ^ H[3];
            g = ((3u*i) + 5u) % 16u;
        } else {
            F = H[2] ^ (H[1] | (~H[3]));
            g = (7u*i) % 16u;
        }
        uint tmp = H[3];
        H[3] = H[2];
        H[2] = H[1];
        uint rot = H[0] + F + K[i] + M[g];
        H[1] += ((rot << s[i]) | (rot >> (32u-s[i])));
        H[0] = tmp;
    }

    H += h;
}


void main() {
    uvec4 str;
    uint strlen;

    outValue = 0u;
    for (uint i=0u; i<iterations; i++) {
        to_str(i, str, strlen);

        uvec4 hash;
        md5(str, strlen, hash);

        if (hash == needle) {
            atomicCounterIncrement(great_success);
            outValue = 0x80000000u | (inValue+i);
        }
    }
}