diff --git a/Makefile.in b/Makefile.in
index 65911e2a05bb399f2279d387d20485cae19112bb..11c8811415cd331925621260f72f97673628f077 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -607,7 +607,7 @@ distdir: $(DISTFILES)
 		x86_64 x86_64/aesni x86_64/sha_ni x86_64/pclmul x86_64/fat \
 		arm arm/neon arm/v6 arm/fat \
 		arm64 arm64/crypto arm64/fat \
-		powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
+		powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/p9 powerpc64/fat \
 		s390x s390x/vf s390x/msa s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
 	  mkdir "$(distdir)/$$d" ; \
 	  find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
diff --git a/configure.ac b/configure.ac
index 73c6fc21ec430cf4be9c745da50eceee3b935080..b68b9e23dace30d9e7d80321f650ba410c72d0c3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -105,6 +105,10 @@ AC_ARG_ENABLE(power-altivec,
   AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),,
   [enable_altivec=no])
 
+AC_ARG_ENABLE(power9,
+  AC_HELP_STRING([--enable-power9], [Enable POWER ISA v3.0. (default=no)]),,
+  [enable_power9=no])
+
 AC_ARG_ENABLE(s390x-vf,
   AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),,
   [enable_s390x_vf=no])
@@ -539,9 +543,12 @@ if test "x$enable_assembler" = xyes ; then
 	if test "x$enable_fat" = xyes ; then
 	  asm_path="powerpc64/fat $asm_path"
 	  OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES"
-	  FAT_TEST_LIST="none crypto_ext altivec"
+	  FAT_TEST_LIST="none crypto_ext altivec power9"
 	else
-	  if test "$enable_power_crypto_ext" = yes ; then
+	  if test "$enable_power9" = yes ; then
+	    asm_path="powerpc64/p9 $asm_path"
+	  fi
+    if test "$enable_power_crypto_ext" = yes ; then
             asm_path="powerpc64/p8 $asm_path"
 	  fi
 	  if test "$enable_power_altivec" = yes ; then
@@ -605,6 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
   aes256-encrypt-2.asm aes256-decrypt-2.asm \
   cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
   chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
+  poly1305-internal-2.asm \
   ghash-set-key-2.asm ghash-update-2.asm \
   salsa20-2core.asm salsa20-core-internal-2.asm \
   sha1-compress-2.asm sha256-compress-2.asm \
@@ -751,6 +759,9 @@ AH_VERBATIM([HAVE_NATIVE],
 #undef HAVE_NATIVE_ecc_secp384r1_redc
 #undef HAVE_NATIVE_ecc_secp521r1_modp
 #undef HAVE_NATIVE_ecc_secp521r1_redc
+#undef HAVE_NATIVE_poly1305_set_key
+#undef HAVE_NATIVE_poly1305_block
+#undef HAVE_NATIVE_poly1305_digest
 #undef HAVE_NATIVE_ghash_set_key
 #undef HAVE_NATIVE_ghash_update
 #undef HAVE_NATIVE_salsa20_core
diff --git a/fat-ppc.c b/fat-ppc.c
index bf622cf50a44016e9029b6b64c26fe1303491edb..7569e44d6cbf817e9af2c7b3519b045f32200057 100644
--- a/fat-ppc.c
+++ b/fat-ppc.c
@@ -65,6 +65,7 @@
 #include "aes-internal.h"
 #include "chacha-internal.h"
 #include "ghash-internal.h"
+#include "poly1305.h"
 #include "fat-setup.h"
 
 /* Defines from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */
@@ -77,11 +78,15 @@
 #ifndef PPC_FEATURE2_VEC_CRYPTO
 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000
 #endif
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
 
 struct ppc_features
 {
   int have_crypto_ext;
   int have_altivec;
+  int have_power9;
 };
 
 #define MATCH(s, slen, literal, llen) \
@@ -93,6 +98,7 @@ get_ppc_features (struct ppc_features *features)
   const char *s;
   features->have_crypto_ext = 0;
   features->have_altivec = 0;
+  features->have_power9 = 0;
 
   s = secure_getenv (ENV_OVERRIDE);
   if (s)
@@ -105,6 +111,8 @@ get_ppc_features (struct ppc_features *features)
 	  features->have_crypto_ext = 1;
 	else if (MATCH(s, length, "altivec", 7))
 	  features->have_altivec = 1;
+  else if (MATCH(s, length, "power9", 6))
+	  features->have_power9 = 1;
 	if (!sep)
 	  break;
 	s = sep + 1;
@@ -136,6 +144,9 @@ get_ppc_features (struct ppc_features *features)
       features->have_crypto_ext
 	= ((hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO);
 
+      features->have_power9
+	= ((hwcap2 & PPC_FEATURE2_ARCH_3_00) == PPC_FEATURE2_ARCH_3_00);
+
       /* We also need VSX instructions, mainly for load and store. */
       features->have_altivec
 	= ((hwcap & (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX))
@@ -172,6 +183,18 @@ DECLARE_FAT_FUNC(nettle_chacha_crypt32, chacha_crypt_func)
 DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 1core)
 DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 3core)
 
+DECLARE_FAT_FUNC(_nettle_poly1305_set_key, poly1305_set_key_func)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_block, poly1305_block_func)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64)
+
 static void CONSTRUCTOR
 fat_init (void)
 {
@@ -220,6 +243,21 @@ fat_init (void)
       nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core;
       nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core;
     }
+
+  if (features.have_power9)
+    {
+      if (verbose)
+	fprintf (stderr, "libnettle: enabling arch 3.00 code.\n");
+      _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64;
+    _nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64;
+    _nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64;
+    }
+  else
+    {
+      _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c;
+    _nettle_poly1305_block_vec = _nettle_poly1305_block_c;
+    _nettle_poly1305_digest_vec = _nettle_poly1305_digest_c;
+    }
 }
 
 DEFINE_FAT_FUNC(_nettle_aes_encrypt, void,
@@ -261,3 +299,19 @@ DEFINE_FAT_FUNC(nettle_chacha_crypt32, void,
 		 uint8_t *dst,
 		 const uint8_t *src),
 		(ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_set_key, void,
+		(struct poly1305_ctx *ctx,
+     const uint8_t *key),
+		(ctx, key))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_block, void,
+		(struct poly1305_ctx *ctx,
+     const uint8_t *m,
+     unsigned high),
+		(ctx, m, high))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_digest, void,
+		(struct poly1305_ctx *ctx,
+     union nettle_block16 *s),
+		(ctx, s))
diff --git a/fat-setup.h b/fat-setup.h
index e77cce0288a0e5cff1671e17913b2f5b0b06a757..ad3c10f06fe4c7bbf64b179bbe0ee3adcf515467 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -196,6 +196,12 @@ typedef void chacha_crypt_func(struct chacha_ctx *ctx,
 			       uint8_t *dst,
 			       const uint8_t *src);
 
+struct poly1305_ctx;
+typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key);
+typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s);
+typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m,
+			     unsigned high);
+
 struct aes128_ctx;
 typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key);
 typedef void aes128_invert_key_func (struct aes128_ctx *dst, const struct aes128_ctx *src);
diff --git a/poly1305-internal.c b/poly1305-internal.c
index 490fdf714aa7f253aeadcbb1abb24f1987ce95d6..380b934eed72d512283dd7394df8fa62dca1f5cf 100644
--- a/poly1305-internal.c
+++ b/poly1305-internal.c
@@ -85,6 +85,28 @@
 #define h3 h.h32[3]
 #define h4 hh
 
+/* For fat builds */
+#if HAVE_NATIVE_poly1305_set_key
+void
+_nettle_poly1305_set_key_c(struct poly1305_ctx *ctx,
+	       const uint8_t key[16]);
+# define _nettle_poly1305_set_key _nettle_poly1305_set_key_c
+#endif
+
+#if HAVE_NATIVE_poly1305_block
+void
+_nettle_poly1305_block_c(struct poly1305_ctx *ctx, const uint8_t *m,
+	       unsigned t4);
+# define _nettle_poly1305_block _nettle_poly1305_block_c
+#endif
+
+#if HAVE_NATIVE_poly1305_digest
+void
+_nettle_poly1305_digest_c(struct poly1305_ctx *ctx,
+	       union nettle_block16 *s);
+# define _nettle_poly1305_digest _nettle_poly1305_digest_c
+#endif
+
 void
 _nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
 {
diff --git a/powerpc64/fat/poly1305-internal-2.asm b/powerpc64/fat/poly1305-internal-2.asm
new file mode 100644
index 0000000000000000000000000000000000000000..177a45636969fcb15a54d9d347ba181c4d24d9c8
--- /dev/null
+++ b/powerpc64/fat/poly1305-internal-2.asm
@@ -0,0 +1,39 @@
+C powerpc64/fat/poly1305-internal-2.asm
+
+ifelse(`
+   Copyright (C) 2022 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl picked up by configure
+dnl PROLOGUE(_nettle_poly1305_set_key)
+dnl PROLOGUE(_nettle_poly1305_block)
+dnl PROLOGUE(_nettle_poly1305_digest)
+
+define(`fat_transform', `$1_ppc64')
+include_src(`powerpc64/p9/poly1305-internal.asm')
diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm
new file mode 100644
index 0000000000000000000000000000000000000000..238d639739e6078a4173e450e3e6ff5e348edd7d
--- /dev/null
+++ b/powerpc64/p9/poly1305-internal.asm
@@ -0,0 +1,238 @@
+C powerpc64/p9/poly1305-internal.asm
+
+ifelse(`
+   Copyright (C) 2013, 2022 Niels Möller
+   Copyright (C) 2022 Mamone Tarsha
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+C Register usage:
+
+define(`SP', `r1')
+define(`TOCP', `r2')
+
+C Argments
+define(`CTX', `r3')
+define(`M', `r4')
+define(`M128', `r5')
+
+C Working state
+define(`H0', `r6')
+define(`H1', `r7')
+define(`H2', `r8')
+define(`T0', `r9')
+define(`T1', `r10')
+define(`T2', `r8')
+define(`T2A', `r9')
+define(`T2S', `r10')
+define(`IDX', `r6')
+define(`RZ', `r7')
+
+define(`ZERO', `v0')
+define(`F0', `v1')
+define(`F1', `v2')
+define(`F0S', `v3')
+define(`T', `v4')
+
+define(`R', `v5')
+define(`S', `v6')
+
+define(`T00', `v7')
+define(`T10', `v8')
+define(`T11', `v9')
+define(`MU0', `v10')
+define(`MU1', `v11')
+define(`TMP', `v12')
+
+.text
+
+C _poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_set_key)
+	li			r9, 0
+	addis		r5, TOCP, .key_mask@got@ha
+	ld			r5, .key_mask@got@l(r5)
+	ld			r8, 0(r5)
+	ori			r7, r8, 3
+
+	C Load R_0 and R_1
+IF_LE(`
+	ld			r5, 0(r4)
+	ld			r6, 8(r4)
+')
+IF_BE(`
+	ldbrx		r5, 0, r4
+	addi		r4, r4, 8
+	ldbrx		r6, 0, r4
+')
+	and			r5, r5, r7        C R_0 &= 0x0FFFFFFC0FFFFFFF
+	and			r6, r6, r8        C R_1 &= 0x0FFFFFFC0FFFFFFC
+
+	srdi		r10, r6, 2
+	sldi		r7, r5, 2
+	sldi		r8, r10, 2
+	add			r7, r7, r5
+	add			r8, r8, r10
+
+	C Store key
+	std			r5, 0(r3)
+	std			r6, 8(r3)
+	std			r7, 16(r3)
+	std			r8, 24(r3)
+	C Reset state
+	std			r9, 32(r3)
+	std			r9, 40(r3)
+	std			r9, 48(r3)
+
+	blr
+EPILOGUE(_nettle_poly1305_set_key)
+
+C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_block)
+	ld			H0, 32(CTX)
+	ld			H1, 40(CTX)
+	ld			H2, 48(CTX)
+IF_LE(`
+	ld			T0, 0(M)
+	ld			T1, 8(M)
+')
+IF_BE(`
+	ldbrx		T0, 0, M
+	addi		M, M, 8
+	ldbrx		T0, 0, M
+')
+
+	addc		T0, T0, H0
+	adde		T1, T1, H1
+	adde		T2, M128, H2
+
+	mtvsrdd		VSR(T), T0, T1
+
+	li			IDX, 16
+	lxvd2x		VSR(R), 0, CTX
+	lxvd2x		VSR(S), IDX, CTX
+
+	andi.		T2A, T2, 3
+	srdi		T2S, T2, 2
+
+	li			RZ, 0
+	vxor		ZERO, ZERO, ZERO
+
+	xxpermdi	VSR(MU0), VSR(R), VSR(S), 0b01
+	xxswapd		VSR(MU1), VSR(R)
+
+	mtvsrdd		VSR(T11), 0, T2A
+	mtvsrdd		VSR(T00), T2S, RZ
+	mtvsrdd		VSR(T10), 0, T2
+
+	vmsumudm	F0, T, MU0, ZERO
+	vmsumudm	F1, T, MU1, ZERO
+	vmsumudm	TMP, T11, MU1, ZERO
+
+	vmsumudm	F0, T00, S, F0
+	vmsumudm	F1, T10, MU0, F1
+
+	xxmrgld		VSR(TMP), VSR(TMP), VSR(ZERO)
+	xxswapd		VSR(F0S), VSR(F0)
+	vadduqm		F1, F1, TMP
+	stxsd		F0S, 32(CTX)
+
+	li			IDX, 40
+	xxmrghd		VSR(F0), VSR(ZERO), VSR(F0)
+	vadduqm		F1, F1, F0
+	xxswapd		VSR(F1), VSR(F1)
+	stxvd2x		VSR(F1), IDX, CTX
+
+	blr
+EPILOGUE(_nettle_poly1305_block)
+
+C _poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_digest)
+	C Load current state
+	ld			r5, 32(r3)
+	ld			r6, 40(r3)
+	ld			r7, 48(r3)
+
+	C Fold high part of H2
+	li			r10, 0
+	srdi		r9, r7, 2
+	sldi		r8, r9, 2
+	add			r8, r8, r9
+	andi.		r7, r7, 3
+	addc		r5, r5, r8
+	adde		r6, r6, r10
+	adde		r7, r7, r10
+
+	C Add 5 to state, save result if it carries
+	li			r8, 5
+	li			r9, 0
+	li			r10, -4
+	addc		r8, r8, r5
+	adde		r9, r9, r6
+	adde.		r10, r10, r7
+	iseleq		r5, r8, r5
+	iseleq		r6, r9, r6
+
+	C Load digest
+IF_LE(`
+	ld			r7, 0(r4)
+	ld			r8, 8(r4)
+')
+IF_BE(`
+	li			r10, 8
+	ldbrx		r7, 0, r4
+	ldbrx		r8, r10, r4
+')
+
+	C Add hash to digest
+	addc		r5, r5, r7
+	adde		r6, r6, r8
+
+	C Store digest
+IF_LE(`
+	std			r5, 0(r4)
+	std			r6, 8(r4)
+')
+IF_BE(`
+	stdbrx		r5, 0, r4
+	stdbrx		r6, r10, r4
+')
+	C Reset hash
+	li			r9, 0
+	std			r9, 32(r3)
+	std			r9, 40(r3)
+	std			r9, 48(r3)
+
+	blr
+EPILOGUE(_nettle_poly1305_digest)
+
+.data
+.align 3
+.key_mask:
+.quad 0x0FFFFFFC0FFFFFFC