SHA-3 Hash Functions

Overview

SHA-3 (Secure Hash Algorithm 3) is the latest member of the Secure Hash Algorithm family of standards, released by NIST in 2015. Unlike SHA-1 and SHA-2, which are based on the Merkle-Damgård construction, SHA-3 is based on the Keccak sponge construction, providing a fundamentally different approach to cryptographic hashing.

Key Features

Common Use Cases

Algorithm Details

SHA-3 Family

Algorithm Output Size Security Level Capacity Rate
SHA3-224 224 bits (28 bytes) 112 bits 448 bits 1152 bits
SHA3-256 256 bits (32 bytes) 128 bits 512 bits 1088 bits
SHA3-384 384 bits (48 bytes) 192 bits 768 bits 832 bits
SHA3-512 512 bits (64 bytes) 256 bits 1024 bits 576 bits

Security Properties

Implementation

Python Example

from metamui_crypto import SHA3_256, SHA3_512
import os

# Basic hashing with SHA3-256
sha3_256 = SHA3_256()
hash_value = sha3_256.hash(b"Hello, SHA-3!")
print(f"SHA3-256: {hash_value.hex()}")

# Basic hashing with SHA3-512
sha3_512 = SHA3_512()
hash_value_512 = sha3_512.hash(b"Hello, SHA-3!")
print(f"SHA3-512: {hash_value_512.hex()}")

# Incremental hashing
hasher = SHA3_256()
hasher.update(b"First part ")
hasher.update(b"Second part")
final_hash = hasher.finalize()

# File hashing
def hash_file_sha3(filepath):
    """Hash a file using SHA3-256"""
    hasher = SHA3_256()
    with open(filepath, 'rb') as f:
        while chunk := f.read(8192):
            hasher.update(chunk)
    return hasher.finalize()

# Compare with SHA-2
from metamui_crypto import SHA256
message = b"Compare SHA-2 vs SHA-3"
sha2_hash = SHA256().hash(message)
sha3_hash = SHA3_256().hash(message)
print(f"SHA-256: {sha2_hash.hex()}")
print(f"SHA3-256: {sha3_hash.hex()}")

Advanced Usage

# Merkle tree with SHA3
class SHA3MerkleTree:
    def __init__(self, use_sha3_512=False):
        self.hasher = SHA3_512() if use_sha3_512 else SHA3_256()
    
    def hash_leaf(self, data: bytes) -> bytes:
        """Hash leaf node with domain separation"""
        return self.hasher.hash(b"\x00" + data)
    
    def hash_internal(self, left: bytes, right: bytes) -> bytes:
        """Hash internal node"""
        return self.hasher.hash(b"\x01" + left + right)
    
    def compute_root(self, leaves: list) -> bytes:
        """Compute Merkle root"""
        if not leaves:
            return self.hasher.hash(b"")
        
        # Hash all leaves
        current_level = [self.hash_leaf(leaf) for leaf in leaves]
        
        # Build tree bottom-up
        while len(current_level) > 1:
            next_level = []
            
            for i in range(0, len(current_level), 2):
                if i + 1 < len(current_level):
                    parent = self.hash_internal(
                        current_level[i], 
                        current_level[i + 1]
                    )
                else:
                    parent = current_level[i]  # Odd node
                next_level.append(parent)
            
            current_level = next_level
        
        return current_level[0]

# Digital signature with SHA3
class SHA3DigitalSignature:
    def __init__(self, private_key: bytes):
        self.private_key = private_key
        self.hasher = SHA3_256()
    
    def sign_message(self, message: bytes) -> dict:
        """Sign message using SHA3 hash"""
        # Hash message with SHA3
        message_hash = self.hasher.hash(message)
        
        # Create signature structure
        signature_data = {
            'message_hash': message_hash,
            'algorithm': 'SHA3-256',
            'timestamp': int(time.time())
        }
        
        # In practice, use actual signature algorithm
        # This is simplified for demonstration
        signature = self.sign_hash(message_hash)
        
        return {
            'signature': signature,
            'hash_algorithm': 'SHA3-256',
            'message_hash': message_hash.hex()
        }
    
    def verify_signature(self, message: bytes, signature_data: dict) -> bool:
        """Verify signature with SHA3 hash"""
        # Recompute hash
        computed_hash = self.hasher.hash(message)
        expected_hash = bytes.fromhex(signature_data['message_hash'])
        
        # Verify hash matches
        if computed_hash != expected_hash:
            return False
        
        # Verify signature (simplified)
        return self.verify_hash_signature(
            computed_hash, 
            signature_data['signature']
        )

# Blockchain block hashing
class SHA3Blockchain:
    def __init__(self):
        self.hasher = SHA3_256()
        self.chain = []
    
    def create_block(self, transactions: list, previous_hash: bytes) -> dict:
        """Create new block with SHA3 hashing"""
        # Compute transaction root
        merkle_tree = SHA3MerkleTree()
        tx_root = merkle_tree.compute_root([
            json.dumps(tx, sort_keys=True).encode() 
            for tx in transactions
        ])
        
        # Create block header
        block = {
            'version': 1,
            'previous_hash': previous_hash.hex(),
            'merkle_root': tx_root.hex(),
            'timestamp': int(time.time()),
            'transactions': transactions,
            'nonce': 0
        }
        
        # Mine block (simplified proof of work)
        while True:
            block_data = json.dumps(block, sort_keys=True).encode()
            block_hash = self.hasher.hash(block_data)
            
            if block_hash[:2] == b'\x00\x00':  # 2 leading zero bytes
                block['hash'] = block_hash.hex()
                break
            
            block['nonce'] += 1
        
        return block
    
    def verify_block(self, block: dict) -> bool:
        """Verify block hash and structure"""
        # Verify transaction root
        merkle_tree = SHA3MerkleTree()
        computed_root = merkle_tree.compute_root([
            json.dumps(tx, sort_keys=True).encode() 
            for tx in block['transactions']
        ])
        
        if computed_root.hex() != block['merkle_root']:
            return False
        
        # Verify block hash
        block_copy = block.copy()
        del block_copy['hash']
        block_data = json.dumps(block_copy, sort_keys=True).encode()
        computed_hash = self.hasher.hash(block_data)
        
        return computed_hash.hex() == block['hash']

# Content-addressed storage
class SHA3ContentStore:
    def __init__(self):
        self.hasher = SHA3_256()
        self.storage = {}
    
    def store(self, content: bytes) -> str:
        """Store content with SHA3 address"""
        address = self.hasher.hash(content).hex()
        self.storage[address] = content
        return address
    
    def retrieve(self, address: str) -> bytes:
        """Retrieve content by SHA3 address"""
        if address not in self.storage:
            raise KeyError(f"Content not found: {address}")
        
        # Verify integrity
        content = self.storage[address]
        computed_address = self.hasher.hash(content).hex()
        
        if computed_address != address:
            raise ValueError("Content integrity check failed")
        
        return content
    
    def verify_integrity(self) -> dict:
        """Verify integrity of all stored content"""
        results = {'valid': 0, 'corrupted': [], 'total': len(self.storage)}
        
        for address, content in self.storage.items():
            computed = self.hasher.hash(content).hex()
            if computed == address:
                results['valid'] += 1
            else:
                results['corrupted'].append({
                    'address': address,
                    'computed': computed
                })
        
        return results

# Password verification with SHA3
class SHA3PasswordManager:
    def __init__(self):
        self.hasher = SHA3_256()
    
    def hash_password(self, password: str, salt: bytes = None) -> tuple:
        """Hash password with SHA3 (note: use Argon2 in production)"""
        if salt is None:
            salt = os.urandom(32)
        
        # Combine password and salt
        password_bytes = password.encode('utf-8')
        combined = salt + password_bytes
        
        # Multiple rounds for stretching
        current = combined
        for _ in range(10000):  # Simple iteration
            current = self.hasher.hash(current)
        
        return current, salt
    
    def verify_password(self, password: str, stored_hash: bytes, 
                       salt: bytes) -> bool:
        """Verify password against stored hash"""
        computed_hash, _ = self.hash_password(password, salt)
        return computed_hash == stored_hash

Implementation Details

# SHA-3 core implementation (simplified)
class SHA3Core:
    def __init__(self, output_bits):
        self.output_bits = output_bits
        self.rate_bits = 1600 - 2 * output_bits  # Capacity = 2 * output_bits
        self.rate_bytes = self.rate_bits // 8
        self.state = [0] * 25  # 5x5 array of 64-bit words
        self.buffer = bytearray()
        self.finalized = False
    
    def keccak_f(self):
        """Keccak-f[1600] permutation"""
        # Round constants
        RC = [
            0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
            0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
            # ... (24 round constants total)
        ]
        
        for round in range(24):
            # Theta step
            C = [0] * 5
            for x in range(5):
                C[x] = self.state[x] ^ self.state[x+5] ^ self.state[x+10] ^ \
                       self.state[x+15] ^ self.state[x+20]
            
            D = [0] * 5
            for x in range(5):
                D[x] = C[(x+4)%5] ^ self.rotl64(C[(x+1)%5], 1)
            
            for x in range(5):
                for y in range(5):
                    self.state[5*y+x] ^= D[x]
            
            # Rho and Pi steps
            current = self.state[1]
            for t in range(24):
                x, y = self.rho_pi_coords(t)
                temp = self.state[5*y+x]
                self.state[5*y+x] = self.rotl64(current, self.rho_offsets[t])
                current = temp
            
            # Chi step
            for y in range(5):
                temp = [0] * 5
                for x in range(5):
                    temp[x] = self.state[5*y+x]
                for x in range(5):
                    self.state[5*y+x] = temp[x] ^ ((~temp[(x+1)%5]) & temp[(x+2)%5])
            
            # Iota step
            self.state[0] ^= RC[round]
    
    def absorb(self, data: bytes):
        """Absorb data into sponge"""
        if self.finalized:
            raise ValueError("Cannot absorb after finalization")
        
        self.buffer.extend(data)
        
        # Process complete blocks
        while len(self.buffer) >= self.rate_bytes:
            block = self.buffer[:self.rate_bytes]
            self.buffer = self.buffer[self.rate_bytes:]
            
            # XOR block into state
            for i in range(0, len(block), 8):
                word_bytes = block[i:i+8]
                if len(word_bytes) < 8:
                    word_bytes += b'\x00' * (8 - len(word_bytes))
                word = int.from_bytes(word_bytes, 'little')
                self.state[i // 8] ^= word
            
            # Apply permutation
            self.keccak_f()
    
    def finalize(self) -> bytes:
        """Finalize and squeeze output"""
        if self.finalized:
            raise ValueError("Already finalized")
        
        # Pad remaining data
        self.buffer.append(0x06)  # SHA-3 domain separator
        while len(self.buffer) % self.rate_bytes != self.rate_bytes - 1:
            self.buffer.append(0x00)
        self.buffer.append(0x80)
        
        # Absorb final block
        self.absorb(bytes(self.buffer))
        self.buffer = bytearray()
        
        # Squeeze output
        output = bytearray()
        output_bytes = self.output_bits // 8
        
        while len(output) < output_bytes:
            # Extract bytes from state
            for i in range(min(self.rate_bytes, output_bytes - len(output))):
                word_index = i // 8
                byte_index = i % 8
                word = self.state[word_index]
                byte_val = (word >> (8 * byte_index)) & 0xFF
                output.append(byte_val)
            
            if len(output) < output_bytes:
                self.keccak_f()
        
        self.finalized = True
        return bytes(output)

Security Considerations

Best Practices

  1. Algorithm Selection
  2. Implementation Security
  3. Protocol Design
  4. Performance Considerations

Common Pitfalls

# DON'T: Mix SHA-2 and SHA-3 without clear separation
# sha2_hash = SHA256().hash(data)
# sha3_hash = SHA3_256().hash(sha2_hash)  # Unclear why mixing

# DO: Use consistent hash function
sha3_hash = SHA3_256().hash(data)

# DON'T: Use short output for security-critical applications
# sha3_224 = SHA3_224()  # Only 112-bit security
# weak_hash = sha3_224.hash(sensitive_data)

# DO: Use adequate security level
sha3_256 = SHA3_256()  # 128-bit security
secure_hash = sha3_256.hash(sensitive_data)

# DON'T: Assume SHA-3 is always better than SHA-2
# Both are secure, choose based on requirements

# DO: Consider performance and compatibility
if need_speed:
    hasher = SHA256()  # Faster on most platforms
elif need_sha3_features:
    hasher = SHA3_256()  # Length extension resistance

Performance Characteristics

Benchmarks

Algorithm Input Size Throughput Time Relative to SHA-256
SHA3-256 1 KB 398 MB/s 2.51 μs 0.45x
SHA3-256 64 KB 445 MB/s 144 μs 0.48x
SHA3-512 1 KB 312 MB/s 3.21 μs 0.35x
SHA3-512 64 KB 356 MB/s 180 μs 0.38x

Performance vs Other Hash Functions

Algorithm 1KB Throughput Security Level Use Case
SHA3-256 398 MB/s 128-bit Standards compliance
SHA-256 892 MB/s 128-bit General purpose
BLAKE3 2.91 GB/s 128-bit High performance
BLAKE2b 1.82 GB/s 256-bit Fast hashing

Optimization Strategies

# Batch processing for multiple inputs
class BatchSHA3:
    def __init__(self, algorithm='SHA3-256'):
        self.algorithm = algorithm
        self.hasher_class = SHA3_256 if algorithm == 'SHA3-256' else SHA3_512
    
    def hash_batch(self, inputs: list) -> list:
        """Hash multiple inputs efficiently"""
        results = []
        
        for data in inputs:
            hasher = self.hasher_class()
            result = hasher.hash(data)
            results.append(result)
        
        return results

# Streaming for large files
def hash_large_file_sha3(filepath: str, chunk_size: int = 1024*1024):
    """Hash large file with progress reporting"""
    hasher = SHA3_256()
    file_size = os.path.getsize(filepath)
    bytes_read = 0
    
    with open(filepath, 'rb') as f:
        while chunk := f.read(chunk_size):
            hasher.update(chunk)
            bytes_read += len(chunk)
            progress = (bytes_read / file_size) * 100
            yield progress
    
    yield hasher.finalize()

# Memory-efficient processing
class StreamingSHA3:
    def __init__(self):
        self.hasher = SHA3_256()
        self.total_bytes = 0
    
    def process_stream(self, data_stream):
        """Process data stream without loading into memory"""
        for chunk in data_stream:
            self.hasher.update(chunk)
            self.total_bytes += len(chunk)
            yield self.total_bytes
        
        yield self.hasher.finalize()

Use Cases

1. Blockchain Implementation

class SHA3Blockchain:
    def __init__(self):
        self.hasher = SHA3_256()
        self.difficulty = 4  # Number of leading zero bytes
    
    def mine_block(self, block_data: dict) -> dict:
        """Mine block using SHA3 proof of work"""
        nonce = 0
        target = b'\x00' * self.difficulty
        
        while True:
            block_data['nonce'] = nonce
            block_bytes = json.dumps(block_data, sort_keys=True).encode()
            block_hash = self.hasher.hash(block_bytes)
            
            if block_hash[:self.difficulty] == target:
                block_data['hash'] = block_hash.hex()
                return block_data
            
            nonce += 1
            
            # Progress reporting
            if nonce % 100000 == 0:
                print(f"Mining... nonce: {nonce}")
    
    def verify_block(self, block: dict) -> bool:
        """Verify block hash"""
        block_copy = block.copy()
        stored_hash = block_copy.pop('hash')
        
        block_bytes = json.dumps(block_copy, sort_keys=True).encode()
        computed_hash = self.hasher.hash(block_bytes)
        
        return computed_hash.hex() == stored_hash

2. Digital Certificate System

class SHA3CertificateAuthority:
    def __init__(self, ca_private_key: bytes):
        self.ca_private_key = ca_private_key
        self.hasher = SHA3_256()
    
    def create_certificate(self, subject_info: dict, 
                          public_key: bytes) -> dict:
        """Create digital certificate with SHA3 hash"""
        certificate = {
            'version': 3,
            'subject': subject_info,
            'public_key': public_key.hex(),
            'issuer': 'SHA3 Certificate Authority',
            'valid_from': int(time.time()),
            'valid_until': int(time.time()) + (365 * 24 * 3600),  # 1 year
            'serial_number': os.urandom(16).hex()
        }
        
        # Hash certificate data
        cert_data = json.dumps(certificate, sort_keys=True).encode()
        cert_hash = self.hasher.hash(cert_data)
        
        # Sign hash (simplified)
        signature = self.sign_hash(cert_hash)
        
        certificate['signature'] = signature.hex()
        certificate['hash_algorithm'] = 'SHA3-256'
        
        return certificate
    
    def verify_certificate(self, certificate: dict) -> bool:
        """Verify certificate signature"""
        cert_copy = certificate.copy()
        signature = bytes.fromhex(cert_copy.pop('signature'))
        hash_alg = cert_copy.pop('hash_algorithm')
        
        if hash_alg != 'SHA3-256':
            return False
        
        # Recompute hash
        cert_data = json.dumps(cert_copy, sort_keys=True).encode()
        cert_hash = self.hasher.hash(cert_data)
        
        # Verify signature
        return self.verify_signature(cert_hash, signature)

3. Secure File Storage

class SHA3SecureStorage:
    def __init__(self, storage_key: bytes):
        self.storage_key = storage_key
        self.hasher = SHA3_512()  # Use SHA3-512 for file integrity
        self.files = {}
    
    def store_file(self, filename: str, content: bytes) -> dict:
        """Store file with SHA3 integrity protection"""
        # Compute file hash
        file_hash = self.hasher.hash(content)
        
        # Create file metadata
        metadata = {
            'filename': filename,
            'size': len(content),
            'hash': file_hash.hex(),
            'algorithm': 'SHA3-512',
            'timestamp': time.time()
        }
        
        # Store file and metadata
        file_id = self.hasher.hash(
            filename.encode() + file_hash
        ).hex()[:16]
        
        self.files[file_id] = {
            'content': content,
            'metadata': metadata
        }
        
        return {
            'file_id': file_id,
            'hash': file_hash.hex(),
            'size': len(content)
        }
    
    def retrieve_file(self, file_id: str) -> tuple:
        """Retrieve file with integrity verification"""
        if file_id not in self.files:
            raise KeyError(f"File not found: {file_id}")
        
        file_data = self.files[file_id]
        content = file_data['content']
        metadata = file_data['metadata']
        
        # Verify integrity
        computed_hash = self.hasher.hash(content)
        stored_hash = bytes.fromhex(metadata['hash'])
        
        if computed_hash != stored_hash:
            raise ValueError("File integrity check failed")
        
        return content, metadata
    
    def verify_all_files(self) -> dict:
        """Verify integrity of all stored files"""
        results = {
            'total': len(self.files),
            'valid': 0,
            'corrupted': []
        }
        
        for file_id, file_data in self.files.items():
            try:
                self.retrieve_file(file_id)
                results['valid'] += 1
            except ValueError:
                results['corrupted'].append(file_id)
        
        return results

4. Cryptographic Protocol

class SHA3Protocol:
    def __init__(self):
        self.hasher = SHA3_256()
    
    def key_derivation(self, shared_secret: bytes, 
                      context: bytes) -> bytes:
        """Derive key using SHA3"""
        # Simple key derivation (use HKDF in production)
        combined = shared_secret + context + b"key_derivation"
        return self.hasher.hash(combined)
    
    def challenge_response(self, challenge: bytes, 
                          secret: bytes) -> bytes:
        """Generate challenge response"""
        response_data = challenge + secret + b"challenge_response"
        return self.hasher.hash(response_data)
    
    def commitment_scheme(self, value: bytes, 
                         randomness: bytes = None) -> tuple:
        """Create commitment using SHA3"""
        if randomness is None:
            randomness = os.urandom(32)
        
        commitment_data = randomness + value + b"commitment"
        commitment = self.hasher.hash(commitment_data)
        
        return commitment, randomness
    
    def verify_commitment(self, commitment: bytes, value: bytes, 
                         randomness: bytes) -> bool:
        """Verify commitment"""
        computed_commitment, _ = self.commitment_scheme(value, randomness)
        return computed_commitment == commitment

Comparison with Other Hash Functions

SHA-3 vs SHA-2

Feature SHA-3 SHA-2
Construction Sponge (Keccak) Merkle-Damgård
Length Extension Immune Vulnerable
Performance Slower Faster
Security Margin Large Adequate
Hardware Support Limited Extensive
Standardization FIPS 202 FIPS 180-4

SHA-3 vs BLAKE3

Feature SHA-3 BLAKE3
Speed Moderate Very Fast
Parallelization Limited Excellent
Output Length Fixed Unlimited
Security Analysis Extensive Growing
Adoption Moderate Emerging

When to Use SHA-3

# Use SHA-3 for length extension resistance
if protocol_needs_length_extension_resistance:
    hasher = SHA3_256()

# Use SHA-3 for regulatory compliance
if requires_nist_approved_alternative_to_sha2:
    hasher = SHA3_256()

# Use SHA-3 for algorithm diversity
hash_functions = [SHA256(), SHA3_256(), BLAKE3()]
primary_hash = hash_functions[0]
backup_hash = hash_functions[1]

# Use SHA-3 for post-quantum applications
if building_post_quantum_system:
    hasher = SHA3_256()  # Good foundation for PQC

Migration Guide

From SHA-2 to SHA-3

# Before: SHA-256
import hashlib
sha2_hash = hashlib.sha256(data).digest()

# After: SHA3-256
from metamui_crypto import SHA3_256
sha3_hash = SHA3_256().hash(data)

# Migration wrapper
class HashMigrator:
    def __init__(self, use_sha3=False):
        self.use_sha3 = use_sha3
    
    def hash(self, data: bytes) -> bytes:
        if self.use_sha3:
            return SHA3_256().hash(data)
        else:
            return hashlib.sha256(data).digest()

# Gradual migration
def compute_hash(data: bytes, algorithm='sha256'):
    if algorithm == 'sha3-256':
        return SHA3_256().hash(data)
    elif algorithm == 'sha256':
        return hashlib.sha256(data).digest()
    else:
        raise ValueError(f"Unsupported algorithm: {algorithm}")

From MD5/SHA-1 to SHA-3

# Before: MD5 (insecure)
import hashlib
md5_hash = hashlib.md5(data).digest()

# After: SHA3-256 (secure)
from metamui_crypto import SHA3_256
sha3_hash = SHA3_256().hash(data)

# Note: Output sizes are different
# MD5: 16 bytes, SHA3-256: 32 bytes
# Update any code that depends on hash length

Test Vectors

SHA3-256 Test Vectors

# Test Vector 1: Empty string
sha3_256 = SHA3_256()
assert sha3_256.hash(b"").hex() == "a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a"

# Test Vector 2: "abc"
assert sha3_256.hash(b"abc").hex() == "3a985da74fe225b2045c172d6bd390bd855f086e3e9d525b46bfe24511431532"

# Test Vector 3: Long message
message = b"a" * 1000000
assert sha3_256.hash(message).hex() == "5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1"

SHA3-512 Test Vectors

# Test Vector 4: Empty string
sha3_512 = SHA3_512()
assert sha3_512.hash(b"").hex() == "a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a615b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26"

# Test Vector 5: "abc"
assert sha3_512.hash(b"abc").hex() == "b751850b1a57168a5693cd924b6b096e08f621827444f70d884f5d0240d2712e10e116e9192af3c91a7ec57647e3934057340b4cf408d5a56592f8274eec53f0"

Incremental Hashing Test

# Test Vector 6: Incremental vs one-shot
message = b"The quick brown fox jumps over the lazy dog"

# One-shot
sha3_256 = SHA3_256()
one_shot = sha3_256.hash(message)

# Incremental
hasher = SHA3_256()
hasher.update(b"The quick brown ")
hasher.update(b"fox jumps over ")
hasher.update(b"the lazy dog")
incremental = hasher.finalize()

assert one_shot == incremental

References