Solidity Assembly Gas Optimized Keccak256

How to get a 42% gas saving when calling keccak256

·

20 min read

Solidity smart contracts often use keccak256 to hash a number of input parameters; the standard way of calling this function looks like this:

function getKeccak256(uint256 a, uint256 b, uint256 c) external pure returns(bytes32 result) {
    result = keccak256(abi.encode(a,b,c));
}

However it is possible to reduce the gas cost of computing the hash by ~42% using the following assembly:

function getKeccak256(uint256 a, uint256 b, uint256 c) external pure returns(bytes32 result) {
    assembly {
        let mPtr := mload(0x40)
        mstore(mPtr, a)
        mstore(add(mPtr, 0x20), b)
        mstore(add(mPtr, 0x40), c)

        result := keccak256(mPtr, 0x60)
    }
}

Let's examine how and why this gas saving occurs!

Prerequisite Knowledge

In order to understand the next section you'll need to have completed at least Section 1 of Updraft's Assembly & Formal Verification course or already have equivalent knowledge of:

Foundry Testing Code

To examine the execution of both functions we'll use the following stand-alone Foundry test contract:

// SPDX-License-Identifier: MIT
pragma solidity 0.8.25;

import "forge-std/Test.sol";

// separate contracts for each implementation then accessing
// implementations via an interface in the test contract
// prevents the optimizer from being "too smart", helping
// to better approximate real-world execution
interface IGasImpl {
    function getKeccak256(uint256 a, uint256 b, uint256 c) external pure returns(bytes32 result);
}

contract GasImplNormal is IGasImpl {
    function getKeccak256(uint256 a, uint256 b, uint256 c) external pure returns(bytes32 result) {
        result = keccak256(abi.encode(a,b,c));
    }
}

contract GasImplAssembly is IGasImpl {
    function getKeccak256(uint256 a, uint256 b, uint256 c) external pure returns(bytes32 result) {
        assembly {
            let mPtr := mload(0x40)
            mstore(mPtr, a)
            mstore(add(mPtr, 0x20), b)
            mstore(add(mPtr, 0x40), c)

            result := keccak256(mPtr, 0x60)
        }
    }
}

// actual testing contract
contract GasDebugTest is Test {
    IGasImpl gasImplNormal   = new GasImplNormal();
    IGasImpl gasImplAssembly = new GasImplAssembly();
    uint256 a = 1;
    uint256 b = 2;
    uint256 c = 3;

    // forge test --match-contract GasDebugTest --debug test_GasImplNormal
    function test_GasImplNormal() external {
        bytes32 result = gasImplNormal.getKeccak256(a,b,c);
        assertEq(result, 0x6e0c627900b24bd432fe7b1f713f1b0744091a646a9fe4a65a18dfed21f2949c);
    }

    // forge test --match-contract GasDebugTest --debug test_GasImplAssembly
    function test_GasImplAssembly() external {
        bytes32 result = gasImplAssembly.getKeccak256(a,b,c);
        assertEq(result, 0x6e0c627900b24bd432fe7b1f713f1b0744091a646a9fe4a65a18dfed21f2949c);
    }
}

Execution Trace - Assembly Version

Next we'll step through the assembly version using Foundry's debugger by executing this command: forge test --match-contract GasDebugTest --debug test_GasImplAssembly

We are concerned with the execution inside the GasImplAssembly contract:

  • from the first PUSH1(0x40) after calldata has been loaded onto the stack and JUMPDEST has been called

  • until keccak256 has been called and the calculated hash put on the stack

The above execution started at PC 0x39 (57) until 0x4f (79) and used 341-224 = 117 gas. Some useful acronyms are:

  • Free Memory Pointer Address (FMPA)

  • Starting Next Free Memory Address (SNFMA)

  • Next Free Memory Address (NFMA)

Let's step through the execution examining how the assembly version works:

// push Free Memory Pointer Address (FMPA) onto stack
PUSH1(0x40) [Stack : 0x40, 0x03, 0x02, 0x01, 0x52, 0x05536b19]    
            [Memory: 0x40 = 0x80                             ]

// duplicate FMPA
DUP1        [Stack : 0x40, 0x40, 0x03, 0x02, 0x01, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80                                   ]

// load Starting Next Free Memory Address (SNFMA) by reading FMPA
MLOAD       [Stack : 0x80, 0x40, 0x03, 0x02, 0x01, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80                                   ]

// exchange 5th and 1st stack elements
// note: a common pattern follows to store input parameters in memory
SWAP4       [Stack : 0x01, 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80                                   ]

// duplicate SNFMA
DUP5        [Stack : 0x80, 0x01, 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80                                         ]

// store value of `a` into memory at SNFMA
MSTORE      [Stack : 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                ]

// push 0x20 onto stack (2nd param of first `add` call)
// this is an offset to calculate Next Free Memory Address (NFMA)
PUSH1(0x20) [Stack : 0x20, 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                      ]

// duplicate SNFMA
DUP5        [Stack : 0x80, 0x20, 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                            ]

// calculate NFMA by SNFMA + offset (0x80 + 0x20)
ADD         [Stack : 0xa0, 0x40, 0x03, 0x02, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                      ]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0x02, 0x40, 0x03, 0xa0, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                      ]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x40, 0x02, 0x03, 0xa0, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                      ]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0xa0, 0x02, 0x03, 0x40, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01                      ]

// store value of `b` into memory at NFMA
MSTORE      [Stack : 0x03, 0x40, 0x80, 0x52, 0x05536b19   ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x40, 0x03, 0x80, 0x52, 0x05536b19   ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02]

// duplicate SNFMA
DUP3        [Stack : 0x80, 0x40, 0x03, 0x80, 0x52, 0x05536b19]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02   ]

// calculate NFMA by SNFMA + offset (0x80 + 0x40)
ADD         [Stack : 0xc0, 0x03, 0x80, 0x52, 0x05536b19   ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02]

// store value of `c` into memory at NFMA
MSTORE      [Stack : 0x80, 0x52, 0x05536b19                            ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// note: all input parameters are now stored in memory

// push `size` parameter for call to keccak onto stack
PUSH1(0x60) [Stack : 0x60, 0x80, 0x52, 0x05536b19                      ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x80, 0x60, 0x52, 0x05536b19                      ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// call keccak256(offset, size)
KECCAK256   [Stack : result, 0x52, 0x05536b19                          ]
            [Memory: 0x40 = 0x80, 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

The assembly version:

  • stored the first input parameter a at the Starting Next Free Memory Address (SNFMA)

  • calculated 2 additional Next Free Memory Addresses (NFMA) which it used to store input parameters b, c

  • once this was completed there were only 3 more opcodes; 2 to prepare the offset, size input parameters and the final to call keccak256

  • 20 opcodes in total were executed including 1 MLOAD and 3 MSTORE

Execution Trace - Solidity Version

Having examined the assembly version we'll now turn to the solidity version using Foundry's debugger by executing this command: forge test --match-contract GasDebugTest --debug test_GasImplNormal

We are concerned with the execution inside the GasImplNormal contract:

  • from the first PUSH1(0x40) after calldata has been loaded onto the stack and JUMPDEST has been called

  • until keccak256 has been called and the calculated hash put on the stack

The above execution started at PC 0x39 (57) until 0x6c (108) and used 428-224 = 204 gas; 74% more than the assembly version!

// push Free Memory Pointer Address (FMPA) onto stack
PUSH1(0x40) [Stack : 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                             ]

// duplicate FMPA
DUP1        [Stack : 0x40, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                   ]

// load Starting Next Free Memory Address (SNFMA) by reading FMPA
MLOAD       [Stack : 0x80, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                   ]

// push 0x20 onto stack
// this is an offset to calculate Next Free Memory Address (NFMA)
PUSH1(0x20) [Stack : 0x20, 0x80, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                         ]

// duplicate offset
DUP1        [Stack : 0x20, 0x20, 0x80, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                               ]

// duplicate SNFMA
DUP3        [Stack : 0x80, 0x20, 0x20, 0x80, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                                     ]

// calculate NFMA by SNFMA + offset (0x80 + 0x20)
ADD         [Stack : 0xa0, 0x20, 0x80, 0x40, 0x03, 0x02, 0x01, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                               ]

// exchange 7th and 1st stack elements
// note: a common pattern follows to store input parameters in memory
SWAP6       [Stack : 0x01, 0x20, 0x80, 0x40, 0x03, 0x02, 0xa0, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                               ]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x20, 0x01, 0x80, 0x40, 0x03, 0x02, 0xa0, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                               ]

// exchange 7th and 1st stack elements
SWAP6       [Stack : 0xa0, 0x01, 0x80, 0x40, 0x03, 0x02, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80                                               ]

// store value of `a` into memory at NFMA
MSTORE      [Stack : 0x80, 0x40, 0x03, 0x02, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                      ]

// note: normal version doesn't store first input at SNFMA so will have 
// to calculate one extra NFMA to store all inputs into memory compared 
// to assembly version

// duplicate SNFMA
DUP1        [Stack : 0x80, 0x80, 0x40, 0x03, 0x02, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                            ]

// duplicate FMPA
DUP3        [Stack : 0x40, 0x80, 0x80, 0x40, 0x03, 0x02, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                                  ]

// calculate NFMA by FMPA + offset (0x40 + 0x80)
ADD         [Stack : 0xc0, 0x80, 0x40, 0x03, 0x02, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                            ]

// exchange 5th and 1st stack elements
SWAP4       [Stack : 0x02, 0x80, 0x40, 0x03, 0xc0, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                            ]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x80, 0x02, 0x40, 0x03, 0xc0, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                            ]

// exchange 5th and 1st stack elements
SWAP4       [Stack : 0xc0, 0x02, 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01                            ]

// store value of `b` into memory at NFMA
MSTORE      [Stack : 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02   ]

// another offset used calculate NFMA
PUSH1(0x60) [Stack : 0x60, 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02         ]

// duplicate the offset
DUP1        [Stack : 0x60, 0x60, 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02               ]

// duplicate SNFMA
DUP5        [Stack : 0x80, 0x60, 0x60, 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02                     ]

// calculate NFMA by SNFMA + offset (0x80 + 0x60)
ADD         [Stack : 0xe0, 0x60, 0x40, 0x03, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02               ]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0x03, 0x60, 0x40, 0xe0, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02               ]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x60, 0x03, 0x40, 0xe0, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02               ]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0xe0, 0x03, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02               ]

// store value of `c` into memory at NFMA
MSTORE      [Stack : 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19          ]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// note: all input parameters are now stored in memory

// duplicate FMPA
DUP1        [Stack : 0x40, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19    ]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// load Starting Next Free Memory Address (SNFMA) by reading FMPA
MLOAD       [Stack : 0x80, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19    ]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// duplicate SNFMA
// note: subsequent opcodes related to `abi.encode`
DUP1        [Stack : 0x80, 0x80, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03  ]

// duplicate SNFMA
DUP5        [Stack : 0x80, 0x80, 0x80, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03        ]

// subtract 2nd element from 1st
SUB         [Stack : 0x00, 0x80, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03  ]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x80, 0x00, 0x40, 0x60, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03  ]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0x60, 0x00, 0x40, 0x80, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03  ]

// calculate the `size` parameter required for
// the later keccak256 call
ADD         [Stack : 0x60, 0x40, 0x80, 0x80, 0x20, 0x6f, 0x05536b19    ]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// duplicate SNFMA
DUP3        [Stack : 0x80, 0x60, 0x40, 0x80, 0x80, 0x20, 0x6f, 0x05536b19]
            [Memory: 0x40 = 0x80, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03  ]

// stores the `size` parameter (0x60) at SNFMA
// which will be used later for the keccak256 call
MSTORE      [Stack : 0x40, 0x80, 0x80, 0x20, 0x6f, 0x05536b19                       ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// push SNFMA onto stack
PUSH1(0x80) [Stack : 0x80, 0x40, 0x80, 0x80, 0x20, 0x6f, 0x05536b19                 ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x40, 0x80, 0x80, 0x80, 0x20, 0x6f, 0x05536b19                 ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 4th and 1st stack elements
SWAP3       [Stack : 0x80, 0x80, 0x80, 0x40, 0x20, 0x6f, 0x05536b19                 ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// calculate NFMA
ADD         [Stack : 0x100, 0x80, 0x40, 0x20, 0x6f, 0x05536b19                      ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 2nd and 1st stack elements
SWAP1       [Stack : 0x80, 0x100, 0x40, 0x20, 0x6f, 0x05536b19                      ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 3rd and 1st stack elements
SWAP2       [Stack : 0x40, 0x100, 0x80, 0x20, 0x6f, 0x05536b19                      ]
            [Memory: 0x40 = 0x80, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// over-write value of FMPA with next NFMA
MSTORE      [Stack : 0x80, 0x20, 0x6f, 0x05536b19                                    ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// note: normal version had to calculate a second additional NFMA
// and update memory at FMPA; optimized version didn't do any of this

// duplicate SNFMA
DUP1        [Stack : 0x80, 0x80, 0x20, 0x6f, 0x05536b19                              ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// put value stored at SNFMA on stack
// this will be the `size` parameter for the keccak256
// call previously calculated
MLOAD       [Stack : 0x60, 0x80, 0x20, 0x6f, 0x05536b19                              ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 3rd and 1st stack elements
SWAP2       [Stack : 0x20, 0x80, 0x60, 0x6f, 0x05536b19                              ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// calculate memory address of first parameter; used
// as `offset` for keccak256
ADD         [Stack : 0xa0, 0x60, 0x6f, 0x05536b19                                    ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// call keccak256(offset, size)
KECCAK256   [result, 0x6f, 0x05536b19                                                ]
            [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

Gas Usage Comparison

In contrast to the assembly version, the solidity version:

  • did not store the first input parameter a at the Starting Next Free Memory Address (SNFMA)

  • instead it calculated 3 additional Next Free Memory Addresses (NFMA) which it used to store input parameters a, b, c

  • once this was completed there were 22 more opcodes in contrast to the assembly version's 3 more opcodes

  • calculated the offset parameter for keccak256 which the assembly version hard-coded

  • updated the Free Memory Pointer Address (FMPA) which the assembly version didn't need to, even though it didn't end up using the updated address (0x100)

  • executed 48 opcodes in total compared to the 20 opcodes of the assembly version, including 3 MLOAD and 5 MSTORE

  • used 204 gas instead of the assembly version's 117 resulting in a 74% increase to gas usage (204-117=87, (87/117)*100 = 74)

  • hence the assembly version provided a 42% gas saving compared to the solidity version (204-117=87, (87/204)*100 = 42)

Does Compiling With --via-ir Help?

Compiling with --via-ir offers modest gas improvements to the relevant code:

  • assembly version uses 108 gas, down from 117 gas

  • solidity version uses 195 gas, down from 204 gas

The relevant execution traces follow.

Execution Trace - Assembly --via-ir Version

Execute this command: forge test --match-contract GasDebugTest --debug test_GasImplAssembly --via-ir

We are concerned with the execution inside the GasImplAssembly contract:

  • from the first CALLDATALOAD of the first input parameter a

  • until keccak256 has been called and the calculated hash put on the stack

The above execution started at PC 0x32 (50) until 0x46 (70) and used 213-105 = 108 gas:

// push value of `a` onto stack from calldata
CALLDATALOAD [Stack : 0x01]    
             [Memory:     ]

// push Starting Next Free Memory Address (SNFMA) onto stack
PUSH1(0x80)  [Stack : 0x80, 0x01]
             [Memory:           ]

// store value of `a` into memory at SNFMA
MSTORE       [Stack :            ]
             [Memory: 0x80 = 0x01]

// push offset on stack to read next input variable
PUSH1(0x24)  [Stack : 0x24       ]
             [Memory: 0x80 = 0x01]

// push value of `b` onto stack from calldata
CALLDATALOAD [Stack : 0x02       ]
             [Memory: 0x80 = 0x01]

// push Next Free Memory Address (NFMA) onto stack 
PUSH1(0xa0)  [Stack : 0xa0, 0x02 ]
             [Memory: 0x80 = 0x01]

// note: --via-ir was able to pre-compute the free
// memory addresses such that it doesn't have to calculate
// them at runtime but can just push them onto the stack

// store value of `b` into memory at Next Free Memory Address (NFMA)
MSTORE       [Stack :                         ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02]

// push offset on stack to read next input variable
PUSH1(0x44)  [Stack : 0x44                    ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02]

// push value of `c` onto stack from calldata
CALLDATALOAD [Stack : 0x03                    ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02]

// push Next Free Memory Address (NFMA) onto stack 
PUSH1(0xc0)  [Stack : 0xc0, 0x03              ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02]

// store value of `c` into memory at Next Free Memory Address (NFMA)
MSTORE       [Stack :                                      ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// note: all input parameters are now stored in memory

// push `size` parameter for call to keccak onto stack
PUSH1(0x60)  [Stack : 0x60                                 ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// push `offset` parameter for call to keccack onto stack
PUSH1(0x80)  [Stack : 0x80, 0x60                           ]
             [Memory: 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

// call keccak256(offset, size)
KECCAK256   [Stack : result                               ]
            [Memory: 0x80 = 0x01, 0xa0 = 0x02, 0xc0 = 0x03]

Execution Trace - Solidity --via-ir Version

Execute this command: forge test --match-contract GasDebugTest --debug test_GasImplNormal --via-ir

We are concerned with the execution inside the GasImplNormal contract:

  • from the first CALLDATALOAD of the first input parameter a

  • until keccak256 has been called and the calculated hash put on the stack

The above execution started at PC 0x3a (58) until 0x72 (114) and used 318-123 = 195 gas:

// push value of `a` onto stack from calldata
CALLDATALOAD [Stack : 0x01, 0xa0, 0x80, 0x00]    
             [Memory:                       ]

// duplicate Next Free Memory Address (NFMA) onto stack
DUP2         [Stack : 0x0a, 0x01, 0xa0, 0x80, 0x00]    
             [Memory:                             ]

// store value of `a` into memory at NFMA
MSTORE       [Stack : 0xa0, 0x80, 0x00]    
             [Memory: 0xa0 = 0x01     ]

// push offset on stack to read next input variable
PUSH1(0x24)  [Stack : 0x24, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01           ]

// push value of `b` onto stack from calldata
CALLDATALOAD [Stack : 0x02, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01           ]

// push Free Memory Pointer Address (FMPA) onto stack 
PUSH1(0x40)  [Stack : 0x40, 0x02, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01                 ]

// duplicate 0x80
DUP4         [Stack : 0x80, 0x40, 0x02, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01                       ]

// calculate Next Free Memory Address (NFMA)
ADD          [Stack : 0xc0, 0x02, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01                 ]

// store value of `b` into memory at NFMA
MSTORE       [Stack : 0xa0, 0x80, 0x00        ]    
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02] 

// push offset on stack to read next input variable
PUSH1(0x44)  [Stack : 0x44, 0xa0, 0x80, 0x00  ]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02]

// push value of `c` onto stack from calldata
CALLDATALOAD [Stack : 0x03, 0xa0, 0x80, 0x00  ]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02]

// push offset to calculate NFMA
PUSH1(0x60)  [Stack : 0x60, 0x03, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02    ]

// duplicate 0x80
DUP4         [Stack : 0x80, 0x60, 0x03, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02          ]

// calculate NFMA
ADD          [Stack : 0xe0, 0x03, 0xa0, 0x80, 0x00]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02    ]

// store value of `c` into memory at NFMA
MSTORE       [Stack : 0xa0, 0x80, 0x00                     ]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// note: all input parameters are now stored in memory

// push 0x60 which will be saved to memory then later
// used as the `size` parameter for keccak256 call
PUSH1(0x60)  [Stack : 0x60, 0xa0, 0x80, 0x00               ]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// note: subsequent opcodes related to `abi.encode`

// duplicate memory address to save previously pushed `size` parameter
DUP3         [Stack : 0x80, 0x60, 0xa0, 0x80, 0x00         ]
             [Memory: 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// save the `size` parameter into memory
MSTORE       [Stack : 0xa0, 0x80, 0x00                                  ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// used to calculate NFMA
PUSH1(0x80)  [Stack : 0x80, 0xa0, 0x80, 0x00                            ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// used to calculate NFMA
DUP3         [Stack : 0x80, 0x80, 0xa0, 0x80, 0x00                      ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// calculates NFMA; value used in LT and GT comparison
// then saved later on to FMPA
ADD          [Stack : 0x100, 0xa0, 0x80, 0x00                           ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 3rd and 1st stack elements
SWAP2        [Stack : 0x80, 0xa0, 0x100, 0x00                           ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// used for LT comparison
DUP1         [Stack : 0x80, 0x80, 0xa0, 0x100, 0x00                     ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// used for LT comparison
DUP4         [Stack : 0x100, 0x80, 0x80, 0xa0, 0x100, 0x00              ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// 0x100 < 0x80 ? false
LT           [Stack : 0x00, 0x80, 0xa0, 0x100, 0x00                     ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// used for GT comparison
PUSH8(0xffffffffffffffff)
             [Stack : 0xffffffffffffffff, 0x00, 0x80, 0xa0, 0x100, 0x00 ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// duplicate
DUP5         [Stack : 0x100, 0xffffffffffffffff, 0x00, 0x80, 0xa0, 0x100, 0x00]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03      ]

// 0x100 > 0xffffffffffffffff ? false
GT           [Stack : 0x00, 0x00, 0x80, 0xa0, 0x100, 0x00               ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// 0x00 or 0x00 = 0x00
OR           [Stack : 0x00, 0x80, 0xa0, 0x100, 0x00                     ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// not sure why this gets pushed here
PUSH1(0x76)  [Stack : 0x76, 0x00, 0x80, 0xa0, 0x100, 0x00               ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// jump ? false
JUMPI        [Stack : 0x80, 0xa0, 0x100, 0x00                           ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// not sure why this gets pushed here
PUSH1(0x20)  [Stack : 0x20, 0x80, 0xa0, 0x100, 0x00                     ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 5th and 1st stack elements
SWAP4        [Stack : 0x00, 0x80, 0xa0, 0x100, 0x20                     ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// remove top element 0x00
POP          [Stack : 0x80, 0xa0, 0x100, 0x20                           ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// duplicate 
DUP3         [Stack : 0x100, 0x80, 0xa0, 0x100, 0x20                    ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// prepare to over-write FMPA with previously
// calculated NFMA
PUSH1(0x40)  [Stack : 0x40, 0x100, 0x80, 0xa0, 0x100, 0x20              ]
             [Memory: 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// over-write value of FMPA with next NFMA
// FMPA was not being used though?
MSTORE       [Stack : 0x80, 0xa0, 0x100, 0x20                                         ]
             [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// load `size` parameter for keccak256 call
MLOAD        [Stack : 0x60, 0xa0, 0x100, 0x20                                         ]
             [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// exchange 2nd and 1st stack elements
SWAP1        [Stack : 0xa0, 0x60, 0x100, 0x20                                         ]
             [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

// call keccak256(offset, size)
KECCAK256    [Stack : result, 0x100, 0x20                                             ]
             [Memory: 0x40 = 0x100, 0x80 = 0x60, 0xa0 = 0x01, 0xc0 = 0x02, 0xe0 = 0x03]

Formal Verification Using Halmos

The following function can be added to the existing GasDebugTest contract in order to formally verify using Halmos that both the assembly and solidity versions produce the same output:

// halmos --match-contract GasDebugTest
function check_GasImplEquivalent(uint256 a1, uint256 b1, uint256 c1) external {
    bytes32 resultNormal   = gasImplNormal.getKeccak256(a1,b1,c1);
    bytes32 resultAssembly = gasImplAssembly.getKeccak256(a1,b1,c1);

    assertEq(resultNormal, resultAssembly);
}

Execute the formal verification using: halmos --match-contract GasDebugTest