From da920218183ba067a47067baa6ee55bce42b44a9 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Thu, 12 Mar 2020 11:44:14 -0700 Subject: [PATCH] ircd: Add intrinsic typedef header; update stringops unit. --- include/ircd/simd.h | 137 ++++++++++++++++++++++++++++++++++++++++++++ ircd/stringops.cc | 30 +++++----- 2 files changed, 152 insertions(+), 15 deletions(-) create mode 100644 include/ircd/simd.h diff --git a/include/ircd/simd.h b/include/ircd/simd.h new file mode 100644 index 000000000..20a7482e8 --- /dev/null +++ b/include/ircd/simd.h @@ -0,0 +1,137 @@ +// The Construct +// +// Copyright (C) The Construct Developers, Authors & Contributors +// Copyright (C) 2016-2020 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +// +// This header is not included with the standard include group (ircd.h). +// Include this header in specific units as necessary. +// + +#pragma once +#define HAVE_IRCD_INTRIN_H +#include ircd::string_view ircd::toupper(const mutable_buffer &out, @@ -47,27 +47,27 @@ noexcept std::next(begin(in), std::min(size(in), size(out))) }; - const __m128i *src_ + const u128x1 *src_ { - reinterpret_cast(begin(in)) + reinterpret_cast(begin(in)) }; - __m128i *dst + u128x1 *dst { - reinterpret_cast<__m128i *>(begin(out)) + reinterpret_cast(begin(out)) }; - while(intptr_t(src_) < intptr_t(stop) - ssize_t(sizeof(__m128i))) + while(intptr_t(src_) < intptr_t(stop) - ssize_t(sizeof(u128x1))) { - const __m128i lit_A1 { _mm_set1_epi8('A' - 1) }; - const __m128i lit_Z1 { _mm_set1_epi8('Z' + 1) }; - const __m128i addend { _mm_set1_epi8('a' - 'A') }; - const __m128i src { _mm_loadu_si128(src_++) }; - const __m128i gte_A { _mm_cmpgt_epi8(src, lit_A1) }; - const __m128i lte_Z { _mm_cmplt_epi8(src, lit_Z1) }; - const __m128i mask { _mm_and_si128(gte_A, lte_Z) }; - const __m128i ctrl_mask { _mm_and_si128(mask, addend) }; - const __m128i result { _mm_add_epi8(src, ctrl_mask) }; + const u128x1 lit_A1 { _mm_set1_epi8('A' - 1) }; + const u128x1 lit_Z1 { _mm_set1_epi8('Z' + 1) }; + const u128x1 addend { _mm_set1_epi8('a' - 'A') }; + const u128x1 src { _mm_loadu_si128(src_++) }; + const u128x1 gte_A { _mm_cmpgt_epi8(src, lit_A1) }; + const u128x1 lte_Z { _mm_cmplt_epi8(src, lit_Z1) }; + const u128x1 mask { _mm_and_si128(gte_A, lte_Z) }; + const u128x1 ctrl_mask { _mm_and_si128(mask, addend) }; + const u128x1 result { _mm_add_epi8(src, ctrl_mask) }; _mm_storeu_si128(dst++, result); }