diff --git a/arm/fat/chacha-3core.asm b/arm/fat/chacha-3core.asm
new file mode 100644
index 0000000000000000000000000000000000000000..7938ee89bc87db59f64b0b39b41bc096a0799263
--- /dev/null
+++ b/arm/fat/chacha-3core.asm
@@ -0,0 +1,36 @@
+C arm/fat/chacha-3core.asm
+
+
+ifelse(<
+   Copyright (C) 2020 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+dnl PROLOGUE(_nettle_fat_chacha_3core) picked up by configure
+
+include_src(<arm/neon/chacha-3core.asm>)
diff --git a/chacha-crypt.c b/chacha-crypt.c
index c612ea4a6cb92a2470db5552c053337637c4db22..098b53e65c54923669db9dfee7496ddc49280337 100644
--- a/chacha-crypt.c
+++ b/chacha-crypt.c
@@ -55,11 +55,23 @@
 #define CHACHA_ROUNDS 20
 
 #if HAVE_NATIVE_chacha_3core
+#undef _chacha_crypt_3core
+#undef _chacha_crypt32_3core
+#define _chacha_crypt_3core chacha_crypt
+#define _chacha_crypt32_3core chacha_crypt32
+#elif !HAVE_NATIVE_fat_chacha_3core
+#undef _chacha_crypt_1core
+#undef _chacha_crypt32_1core
+#define _chacha_crypt_1core chacha_crypt
+#define _chacha_crypt32_1core chacha_crypt32
+#endif
+
+#if HAVE_NATIVE_chacha_3core || HAVE_NATIVE_fat_chacha_3core
 void
-chacha_crypt(struct chacha_ctx *ctx,
-	      size_t length,
-	      uint8_t *dst,
-	      const uint8_t *src)
+_chacha_crypt_3core(struct chacha_ctx *ctx,
+		    size_t length,
+		    uint8_t *dst,
+		    const uint8_t *src)
 {
   uint32_t x[3*_CHACHA_STATE_LENGTH];
 
@@ -95,12 +107,14 @@ chacha_crypt(struct chacha_ctx *ctx,
     }
   memxor3 (dst, src, x, length);
 }
-#else
+#endif
+
+#if !HAVE_NATIVE_chacha_3core
 void
-chacha_crypt(struct chacha_ctx *ctx,
-	      size_t length,
-	      uint8_t *c,
-	      const uint8_t *m)
+_chacha_crypt_1core(struct chacha_ctx *ctx,
+		    size_t length,
+		    uint8_t *dst,
+		    const uint8_t *src)
 {
   if (!length)
     return;
@@ -117,23 +131,67 @@ chacha_crypt(struct chacha_ctx *ctx,
       
       if (length <= CHACHA_BLOCK_SIZE)
 	{
-	  memxor3 (c, m, x, length);
+	  memxor3 (dst, src, x, length);
 	  return;
 	}
-      memxor3 (c, m, x, CHACHA_BLOCK_SIZE);
+      memxor3 (dst, src, x, CHACHA_BLOCK_SIZE);
 
       length -= CHACHA_BLOCK_SIZE;
-      c += CHACHA_BLOCK_SIZE;
-      m += CHACHA_BLOCK_SIZE;
+      dst += CHACHA_BLOCK_SIZE;
+      src += CHACHA_BLOCK_SIZE;
   }
 }
 #endif
 
+#if HAVE_NATIVE_chacha_3core || HAVE_NATIVE_fat_chacha_3core
 void
-chacha_crypt32(struct chacha_ctx *ctx,
-	       size_t length,
-	       uint8_t *c,
-	       const uint8_t *m)
+_chacha_crypt32_3core(struct chacha_ctx *ctx,
+		      size_t length,
+		      uint8_t *dst,
+		      const uint8_t *src)
+{
+  uint32_t x[3*_CHACHA_STATE_LENGTH];
+
+  if (!length)
+    return;
+
+  while (length > 2*CHACHA_BLOCK_SIZE)
+    {
+      _chacha_3core32 (x, ctx->state, CHACHA_ROUNDS);
+      ctx->state[12] += 3;
+      ctx->state[13] += (ctx->state[12] < 3);
+      if (length <= 3*CHACHA_BLOCK_SIZE)
+	{
+	  memxor3 (dst, src, x, length);
+	  return;
+	}
+      memxor3 (dst, src, x, 3*CHACHA_BLOCK_SIZE);
+
+      length -= 3*CHACHA_BLOCK_SIZE;
+      dst += 3*CHACHA_BLOCK_SIZE;
+      src += 3*CHACHA_BLOCK_SIZE;
+    }
+  if (length <= CHACHA_BLOCK_SIZE)
+    {
+      _chacha_core (x, ctx->state, CHACHA_ROUNDS);
+      ctx->state[13] += (++ctx->state[12] == 0);
+    }
+  else
+    {
+      _chacha_3core32 (x, ctx->state, CHACHA_ROUNDS);
+      ctx->state[12] += 2;
+      ctx->state[13] += (ctx->state[12] < 2);
+    }
+  memxor3 (dst, src, x, length);
+}
+#endif
+
+#if !HAVE_NATIVE_chacha_3core
+void
+_chacha_crypt32_1core(struct chacha_ctx *ctx,
+		      size_t length,
+		      uint8_t *dst,
+		      const uint8_t *src)
 {
   if (!length)
     return;
@@ -150,13 +208,14 @@ chacha_crypt32(struct chacha_ctx *ctx,
 
       if (length <= CHACHA_BLOCK_SIZE)
 	{
-	  memxor3 (c, m, x, length);
+	  memxor3 (dst, src, x, length);
 	  return;
 	}
-      memxor3 (c, m, x, CHACHA_BLOCK_SIZE);
+      memxor3 (dst, src, x, CHACHA_BLOCK_SIZE);
 
       length -= CHACHA_BLOCK_SIZE;
-      c += CHACHA_BLOCK_SIZE;
-      m += CHACHA_BLOCK_SIZE;
+      dst += CHACHA_BLOCK_SIZE;
+      src += CHACHA_BLOCK_SIZE;
   }
 }
+#endif
diff --git a/chacha-internal.h b/chacha-internal.h
index cc90b132adc3cde3b2ec5fd6edbca59c52c29a59..ef6a64a3153c0a18f25a988b2e00134fba9e2ade 100644
--- a/chacha-internal.h
+++ b/chacha-internal.h
@@ -37,9 +37,15 @@
 #define NETTLE_CHACHA_INTERNAL_H_INCLUDED
 
 #include "nettle-types.h"
+#include "chacha.h"
 
 #define _chacha_core _nettle_chacha_core
 #define _chacha_3core _nettle_chacha_3core
+#define _chacha_3core32 _nettle_chacha_3core32
+#define _chacha_crypt_1core _nettle_chacha_crypt_1core
+#define _chacha_crypt_3core _nettle_chacha_crypt_3core
+#define _chacha_crypt32_1core _nettle_chacha_crypt32_1core
+#define _chacha_crypt32_3core _nettle_chacha_crypt32_3core
 
 void
 _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds);
@@ -48,4 +54,31 @@ _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds);
 void
 _chacha_3core(uint32_t *dst, const uint32_t *src, unsigned rounds);
 
+void
+_chacha_3core32(uint32_t *dst, const uint32_t *src, unsigned rounds);
+
+void
+_chacha_crypt_1core(struct chacha_ctx *ctx,
+		    size_t length,
+		    uint8_t *dst,
+		    const uint8_t *src);
+
+void
+_chacha_crypt_3core(struct chacha_ctx *ctx,
+		    size_t length,
+		    uint8_t *dst,
+		    const uint8_t *src);
+
+void
+_chacha_crypt32_1core(struct chacha_ctx *ctx,
+		      size_t length,
+		      uint8_t *dst,
+		      const uint8_t *src);
+
+void
+_chacha_crypt32_3core(struct chacha_ctx *ctx,
+		      size_t length,
+		      uint8_t *dst,
+		      const uint8_t *src);
+
 #endif /* NETTLE_CHACHA_INTERNAL_H_INCLUDED */
diff --git a/configure.ac b/configure.ac
index 3136c1a3666760549f35c9a7d6b6426b672d1030..f6d5fdd81a020f9b3d16b8c7fe4295546c76227e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -560,6 +560,7 @@ AH_VERBATIM([HAVE_NATIVE],
     implementation of the corresponding routine exists.  */
 #undef HAVE_NATIVE_chacha_core
 #undef HAVE_NATIVE_chacha_3core
+#undef HAVE_NATIVE_fat_chacha_3core
 #undef HAVE_NATIVE_ecc_curve25519_modp
 #undef HAVE_NATIVE_ecc_curve448_modp
 #undef HAVE_NATIVE_ecc_secp192r1_modp
diff --git a/fat-arm.c b/fat-arm.c
index a3f0f86059e2d9ae66228371cb263e0747656622..edc7de1c4416b308a2e87156b5590def97221d19 100644
--- a/fat-arm.c
+++ b/fat-arm.c
@@ -43,6 +43,7 @@
 #include "nettle-types.h"
 
 #include "aes-internal.h"
+#include "chacha-internal.h"
 #include "salsa20-internal.h"
 #include "fat-setup.h"
 
@@ -180,6 +181,14 @@ DECLARE_FAT_FUNC(_nettle_chacha_core, chacha_core_func)
 DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, c);
 DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, neon);
 
+DECLARE_FAT_FUNC(nettle_chacha_crypt, chacha_crypt_func)
+DECLARE_FAT_FUNC_VAR(chacha_crypt, chacha_crypt_func, 1core)
+DECLARE_FAT_FUNC_VAR(chacha_crypt, chacha_crypt_func, 3core)
+
+DECLARE_FAT_FUNC(nettle_chacha_crypt32, chacha_crypt_func)
+DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 1core)
+DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 3core)
+
 static void CONSTRUCTOR
 fat_init (void)
 {
@@ -223,6 +232,8 @@ fat_init (void)
       _nettle_umac_nh_vec = _nettle_umac_nh_neon;
       _nettle_umac_nh_n_vec = _nettle_umac_nh_n_neon;
       _nettle_chacha_core_vec = _nettle_chacha_core_neon;
+      nettle_chacha_crypt_vec = _nettle_chacha_crypt_3core;
+      nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_3core;
     }
   else
     {
@@ -235,6 +246,8 @@ fat_init (void)
       _nettle_umac_nh_vec = _nettle_umac_nh_c;
       _nettle_umac_nh_n_vec = _nettle_umac_nh_n_c;
       _nettle_chacha_core_vec = _nettle_chacha_core_c;
+      nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core;
+      nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core;
     }
 }
   
@@ -290,3 +303,16 @@ DEFINE_FAT_FUNC(_nettle_chacha_core, void,
 		(uint32_t *dst, const uint32_t *src, unsigned rounds),
 		(dst, src, rounds))
 
+DEFINE_FAT_FUNC(nettle_chacha_crypt, void,
+		(struct chacha_ctx *ctx,
+		 size_t length,
+		 uint8_t *dst,
+		 const uint8_t *src),
+		(ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_chacha_crypt32, void,
+		(struct chacha_ctx *ctx,
+		 size_t length,
+		 uint8_t *dst,
+		 const uint8_t *src),
+		(ctx, length, dst, src))
diff --git a/fat-setup.h b/fat-setup.h
index 58b687fd5c483ca0a8c616500c9efe08f6006ab5..99f1ea678abdcd092096648d713d51cf4b7edda7 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -93,6 +93,7 @@
 #define ENV_VERBOSE "NETTLE_FAT_VERBOSE"
 #define ENV_OVERRIDE "NETTLE_FAT_OVERRIDE"
 
+struct chacha_ctx;
 struct salsa20_ctx;
 
 /* DECLARE_FAT_FUNC(name, ftype)
@@ -181,3 +182,8 @@ typedef void umac_nh_n_func (uint64_t *out, unsigned n, const uint32_t *key,
 			     unsigned length, const uint8_t *msg);
 
 typedef void chacha_core_func(uint32_t *dst, const uint32_t *src, unsigned rounds);
+
+typedef void chacha_crypt_func(struct chacha_ctx *ctx,
+			       size_t length,
+			       uint8_t *dst,
+			       const uint8_t *src);