diff --git a/Makefile.in b/Makefile.in
index 87d193b63483b4d926901c148f075ec52b34b200..5e0ffdd77601312b565cd093c5a514bb03dce2a3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -620,7 +620,7 @@ distdir: $(DISTFILES)
 		arm arm/neon arm/v6 arm/fat \
 		arm64 arm64/crypto arm64/fat \
 		powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
-		s390x s390x/msa_x1 s390x/msa_x2 ; do \
+		s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 ; do \
 	  mkdir "$(distdir)/$$d" ; \
 	  find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
 	    -exec cp '{}' "$(distdir)/$$d" ';' ; \
diff --git a/configure.ac b/configure.ac
index c0650ec23df6ffed893bbd100a59430649d4900e..64242bfd6c03e759616de60961dc1e8d5aee50dc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -531,7 +531,7 @@ if test "x$enable_assembler" = xyes ; then
       if test "$ABI" = 64 ; then
 	asm_path="s390x"
         if test "$enable_s390x_msa" = yes ; then
-          asm_path="s390x/msa_x1 s390x/msa_x2 $asm_path"
+          asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path"
 	fi
       fi
       ;;
diff --git a/s390x/machine.m4 b/s390x/machine.m4
index acd5e26c41e6ca13ad2d72d08c2f6af2ef8c35fd..8626cec15ae42bbe401035579b6248fb98f6d42e 100644
--- a/s390x/machine.m4
+++ b/s390x/machine.m4
@@ -1,2 +1,21 @@
 C Register usage:
-define(`RA', `%r14')
+define(`RA', `%r14') C Return address
+define(`SP', `%r15') C Stack pointer
+
+define(`STANDARD_STACK_FRAME',`160')
+
+C Dynamic stack space allocation
+C AP is a general register to which the allocated space is assigned
+C SPACE_LEN is the length of space, must be a multiple of 8
+C FREE_STACK macro can be used to free the allocated space
+C ALLOC_STACK(AP, SPACE_LEN)
+define(`ALLOC_STACK',
+`lgr            $1,SP
+    aghi           SP,-(STANDARD_STACK_FRAME+$2)
+    stg            $1,0(SP)
+    la             $1,STANDARD_STACK_FRAME (SP)')
+
+C Free allocated stack space
+C FREE_STACK(SPACE_LEN)
+define(`FREE_STACK',
+`aghi           SP,STANDARD_STACK_FRAME+$1')
diff --git a/s390x/msa_x4/gcm-hash.asm b/s390x/msa_x4/gcm-hash.asm
new file mode 100644
index 0000000000000000000000000000000000000000..50d8b7c014add2c781126ed40d0a844da8122749
--- /dev/null
+++ b/s390x/msa_x4/gcm-hash.asm
@@ -0,0 +1,99 @@
+C s390x/msa_x4/gcm-hash.asm
+
+ifelse(`
+   Copyright (C) 2020 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+C KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) is specefied in
+C "z/Architecture Principles of Operation SA22-7832-12" as follows:
+C A function specified by the function code in general register 0 is performed.
+C General register 1 contains the logical address of the leftmost byte of the parameter block in storage.
+C the second operand is processed as specified by the function code using an initial chaining value in
+C the parameter block, and the result replaces the chaining value.
+
+C This implementation uses KIMD-GHASH function.
+C The parameter block used for the KIMD-GHASH function has the following format:
+C *----------------------------------------------*
+C |       Initial Chaining Value (16 bytes)      |
+C |----------------------------------------------|
+C |            Hash Subkey (16 bytes)            |
+C *----------------------------------------------*
+
+C Size of parameter block
+define(`PB_SIZE', `32')
+
+C gcm_set_key() assigns H value in the middle element of the table
+define(`H_idx', `128*16')
+
+.file "gcm-hash.asm"
+
+.text
+
+C void gcm_init_key (union gcm_block *table)
+
+PROLOGUE(_nettle_gcm_init_key)
+    C Except for Hash Subkey (H), KIMD-GHASH does not need any pre-computed values so just return to the caller.
+    br             RA
+EPILOGUE(_nettle_gcm_init_key)
+
+C void gcm_hash (const struct gcm_key *key, union gcm_block *x,
+C                size_t length, const uint8_t *data)
+
+PROLOGUE(_nettle_gcm_hash)
+    ldgr           %f0,%r6                       C load non-volatile general register 6 into volatile float-point register 0
+    C --- allocate a stack space for parameter block in addition to 16-byte buffer to handle leftover bytes ---
+    ALLOC_STACK(%r1,PB_SIZE+16)                  C parameter block (must be general register 1)
+    lgr            %r6,%r3
+    mvc            0(16,%r1),0(%r3)              C copy x Initial Chaining Value field
+    mvc            16(16,%r1),H_idx (%r2)        C copy H to Hash Subkey field
+    lghi           %r0,65                        C GHASH function code (must be general register 0)
+    lgr            %r2,%r5                       C location of leftmost byte of data (must not be odd-numbered general register nor be general register 0)
+    C number of bytes (must be general register of data + 1). length must be a multiple of the data block size (16).
+    risbg          %r3,%r4,0,187,0               C Insert bit offsets 0-59, bit offset 0 of the fourth operand is set to clear the remaining bits.
+1:  .long   0xb93e0002                           C kimd %r0,%r2
+    brc            1,1b                          C safely branch back in case of partial completion
+    C --- handle leftovers ---
+    risbg          %r5,%r4,60,191,0              C Insert bit offsets 60-63 and clear the remaining bits.
+    jz             4f
+    lgr            %r4,%r2
+    C --- copy the leftovers to allocated stack buffer and pad the remaining bytes with zero ---
+    la             %r2,PB_SIZE (%r1)
+    lghi           %r3,16
+2:  mvcle          %r2,%r4,0
+    brc            1,2b
+    aghi           %r2,-16
+    aghi           %r3,16
+3:  .long   0xb93e0002                           C kimd %r0,%r2
+    brc            1,3b                          C safely branch back in case of partial completion
+4:
+    mvc            0(16,%r6),0(%r1)              C store x
+    xc             0(PB_SIZE+16,%r1),0(%r1)      C wipe parameter block content and leftover bytes of data from stack
+    FREE_STACK(PB_SIZE+16)
+    lgdr           %r6,%f0                       C restore general register 6
+    br             RA
+EPILOGUE(_nettle_gcm_hash)