Clearly the scheme given is not a hash function, in the sense that it is very easy to invert the function to obtain the original message. Hence, with any arbitrary hash, we can easily construct a message with that hash by inverting the operation.

Python Implementation:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from pwn import *
import json 
BLOCK_SIZE = 32

# Nothing up my sleeve numbers (ref: Dual_EC_DRBG P-256 coordinates)
W = [0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296]
X = [0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5]
Y = [0xc97445f4, 0x5cdef9f0, 0xd3e05e1e, 0x585fc297, 0x235b82b5, 0xbe8ff3ef, 0xca67c598, 0x52018192]
Z = [0xb28ef557, 0xba31dfcb, 0xdd21ac46, 0xe2a91e3c, 0x304f44cb, 0x87058ada, 0x2cb81515, 0x1e610046]

# Lets work with bytes instead!
W_bytes = b''.join([x.to_bytes(4,'big') for x in W])
X_bytes = b''.join([x.to_bytes(4,'big') for x in X])
Y_bytes = b''.join([x.to_bytes(4,'big') for x in Y])
Z_bytes = b''.join([x.to_bytes(4,'big') for x in Z])

def pad(data):
    padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
    return data + bytes([padding_len]*padding_len)

def blocks(data):
    return [data[i:(i+BLOCK_SIZE)] for i in range(0,len(data),BLOCK_SIZE)]

def xor(a,b):
    return bytes([x^y for x,y in zip(a,b)])

def rotate_left(data, x):
    x = x % BLOCK_SIZE
    return data[x:] + data[:x]

def rotate_right(data, x):
    x = x % BLOCK_SIZE
    return  data[-x:] + data[:-x]

def scramble_block(block):
    for _ in range(40):
        block = xor(W_bytes, block)
        block = rotate_left(block, 6)
        block = xor(X_bytes, block)
        block = rotate_right(block, 17)
    return block

def cryptohash(msg):
    initial_state = xor(Y_bytes, Z_bytes)
    msg_padded = pad(msg)
    msg_blocks = blocks(msg_padded)
    for i,b in enumerate(msg_blocks):
        mix_in = scramble_block(b)
        for _ in range(i):
            mix_in = rotate_right(mix_in, i+11)
            mix_in = xor(mix_in, X_bytes)
            mix_in = rotate_left(mix_in, i+6)
        print("Mix in: ", mix_in.hex())
        initial_state = xor(initial_state,mix_in)
    return initial_state.hex()

# Revert the scramble function
def unscramble(block):
    for _ in range(40):
        block = rotate_left(block, 17)
        block = xor(X_bytes, block)
        block = rotate_right(block, 6)
        block = xor(W_bytes, block)
    
    return block 

# Target that we want is msg1
msg1 = X_bytes

# msg2 consists of two blocks
msg2 = Y_bytes

target = cryptohash(msg1)
state = cryptohash(msg2)

# Reverse the scrambling stage in the for loop in cryptohash
# scrambling only happens in the 2nd block
mix_in = xor(target, state)
mix_in = rotate_right(mix_in, 7)
mix_in = xor(mix_in, X_bytes)
mix_in = rotate_left(mix_in, 12)

# This is the original block
msg2_block = unscramble(mix_in)

msg2 = msg2 + msg2_block

io = remote('socket.cryptohack.org', 13405)
print(io.recvline())
to_send = dict()
to_send['m1'] = msg1.hex()
to_send['m2'] = msg2.hex()

io.sendline(json.dumps(to_send).encode())
io.interactive()

Other solution take advantage of the flaw in the padding function, kudos to unblvr:

1
2
3
def pad(data):
    padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
    return data + bytes([padding_len]*padding_len)

What this function does, is to pad the current block up to a full block size, by appending the number of missing bytes, not unlike the padding scheme in PKCS#7. The flaw, however, is that a block of the correct size has nothing added to it. This is why in PKCS#7, a message of the correct block size has another padding block appended to it.

This gives us a easy attack, in particular:

1
2
cryptohash(b"\x01"*63) = '321225b996fc333535f2e1119c9eaae7c527af87f29f0e0bf41643fd46d00e46'
cryptohash(b"\x01"*64) = '321225b996fc333535f2e1119c9eaae7c527af87f29f0e0bf41643fd46d00e46'

And the two messages are obviously different, one with 63 bytes and one with 64 bytes. After padding, the two messages are the same, as \x01 is appended.

Lastly, a cool solution from aloof, taking advantage of Sage (or more specifically the symbolic evaluation), to observe the patterns in the output of the hash:

Sage Implementation:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
F = GF(256)
z = F.gen()
K = PolynomialRing(F, 64, names='x')
BLOCK_SIZE = 32

def int_to_field(a):
    return sum(((a >> i) & 1)*z**i for i in range(8))

W = [0x6b17d1f2, 0xe12c4247, 0xf8bce6e5, 0x63a440f2, 0x77037d81, 0x2deb33a0, 0xf4a13945, 0xd898c296]
X = [0x4fe342e2, 0xfe1a7f9b, 0x8ee7eb4a, 0x7c0f9e16, 0x2bce3357, 0x6b315ece, 0xcbb64068, 0x37bf51f5]
Y = [0xc97445f4, 0x5cdef9f0, 0xd3e05e1e, 0x585fc297, 0x235b82b5, 0xbe8ff3ef, 0xca67c598, 0x52018192]
Z = [0xb28ef557, 0xba31dfcb, 0xdd21ac46, 0xe2a91e3c, 0x304f44cb, 0x87058ada, 0x2cb81515, 0x1e610046]

def pad(data):
    padding_len = (BLOCK_SIZE - len(data)) % BLOCK_SIZE
    return data + bytes([padding_len]*padding_len)

def blocks(data):
    return [data[i:(i+BLOCK_SIZE)] for i in range(0,len(data),BLOCK_SIZE)]

def rotate_left(data, x):
    x = x % BLOCK_SIZE
    return data[x:] + data[:x]

def rotate_right(data, x):
    x = x % BLOCK_SIZE
    return  data[-x:] + data[:-x]

# "int" is added because Sage makes integers in the class "Integer"
W_bytes = b''.join([int(x).to_bytes(4,'big') for x in W])
X_bytes = b''.join([int(x).to_bytes(4,'big') for x in X])
Y_bytes = b''.join([int(x).to_bytes(4,'big') for x in Y])
Z_bytes = b''.join([int(x).to_bytes(4,'big') for x in Z])

# the bytes are converted to field elements
W_bytes = [int_to_field(b) for b in W_bytes]
X_bytes = [int_to_field(b) for b in X_bytes]
Y_bytes = [int_to_field(b) for b in Y_bytes]
Z_bytes = [int_to_field(b) for b in Z_bytes]

# xor "^" replaced with "+", which will act as an xor in the binary finite field
def xor(a,b):
    return [x + y for x,y in zip(a,b)]

def scramble_block(block):
    for _ in range(40):
        block = xor(W_bytes, block)
        block = rotate_left(block, 6)
        block = xor(X_bytes, block)
        block = rotate_right(block, 17)
    return block

# the padding is removed: we use the full block size
# and we return the state in the finite field
def cryptohash(msg):
    initial_state = xor(Y_bytes, Z_bytes)
    #msg_padded = pad(msg)
    msg_blocks = blocks(msg)
    for i,b in enumerate(msg_blocks):
        mix_in = scramble_block(b)
        for _ in range(i):
            mix_in = rotate_right(mix_in, i+11)
            mix_in = xor(mix_in, X_bytes)
            mix_in = rotate_left(mix_in, i+6)
        initial_state = xor(initial_state,mix_in)
    # return initial_state.hex()
    return initial_state

# Generate 64 variables
block = K.gens()
print(cryptohash(block))

The output is something like:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# [x8 + x35 + (z8^5 + z8^4 + z8),
#  x9 + x36 + (z8^4 + z8),
#  x10 + x37 + (z8^5 + z8^2 + 1),
#  x11 + x38 + (z8^7 + z8^5 + z8^4 + z8^3 + 1),
#  x12 + x39 + (z8^7 + z8^4 + z8^2 + z8),
#  x13 + x40 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^3 + z8^2),
#  x14 + x41 + (z8^5 + z8^4 + z8 + 1),
#  x15 + x42 + (z8^5 + z8^4 + z8^2 + 1),
#  x16 + x43 + (z8^5 + z8^4 + z8^2 + 1),
#  x17 + x44 + (z8^7 + z8^6 + z8^5 + z8^4 + z8),
#  x18 + x45 + (z8^7 + z8^6 + z8^5 + 1),
#  x19 + x46 + (z8^4 + 1),
#  x20 + x47 + (z8^7 + z8^4 + z8^3 + z8^2),
#  x21 + x48 + (z8^7 + z8^4 + z8^3 + z8^2 + z8),
#  x22 + x49 + (z8^7 + z8^5 + z8^3 + z8),
#  x23 + x50 + (z8^7 + z8^6 + z8^5 + z8^2 + z8 + 1),
#  x24 + x51 + (z8^7 + z8^6 + z8^2 + 1),
#  x25 + x52 + (z8^5 + z8^2 + z8 + 1),
#  x26 + x53 + (z8^7 + z8^5 + z8^3 + z8^2 + z8 + 1),
#  x27 + x54 + (z8^7 + z8^2 + z8 + 1),
#  x28 + x55 + (z8^7 + z8^6 + z8^5 + z8^4 + z8),
#  x29 + x56 + (z8^7 + z8^4 + z8^3 + z8^2 + z8 + 1),
#  x30 + x57 + (z8^3 + z8^2 + z8),
#  x31 + x58 + (z8^3 + z8 + 1),
#  x0 + x59 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^2),
#  x1 + x60 + (z8^4 + z8^2 + z8),
#  x2 + x61 + (z8^6 + z8 + 1),
#  x3 + x62 + (z8^7 + z8^6 + z8^5 + z8^4 + z8^3 + z8^2 + 1),
#  x4 + x63 + (z8^6 + z8^2 + z8),
#  x5 + x32 + (z8^7 + z8^6 + z8^4),
#  x6 + x33 + (z8^3 + z8^2 + z8),
#  x7 + x34 + (z8^6 + z8^2 + z8)]

The $z$ are the constants, hence our task is to find values such that $x_a + x_b$ is the same. A trivial solution is for $a, b \in {0, 63}$, $x_a = x_b = 0$ and $x_a = x_b = 1$

This leads to the solution of:

1
2
m1 = '00'*64
m2 = '01'*64