Replacement of internal RegEx with PCRE2

The pattern and replacement matching behaviour has been changed purely
due to the nature of switching to a standards-compliant library.

One mistake in the previous behaviour was that named groups didn't have
a number. This has been corrected.

As names are actually just an alias of numbered groups,
RegExMatch::get_name_dict() is now get_names() and is a dict
referring to the group number it represents.

Duplicate names are enabled and the with the first matching instance
used.

Due the lack of a suitable equivalent in PCRE2, RegExMatch::expand() was
removed.
This commit is contained in:
Zher Huei Lee 2017-08-07 23:13:15 +08:00
parent bf1f83ed29
commit e3e2f06324
63 changed files with 88352 additions and 1411 deletions

View file

@ -167,6 +167,7 @@ opts.Add('builtin_libvpx', "Use the builtin libvpx library (yes/no)", 'yes')
opts.Add('builtin_libwebp', "Use the builtin libwebp library (yes/no)", 'yes')
opts.Add('builtin_openssl', "Use the builtin openssl library (yes/no)", 'yes')
opts.Add('builtin_opus', "Use the builtin opus library (yes/no)", 'yes')
opts.Add('builtin_pcre2', "Use the builtin pcre2 library (yes/no)", 'yes')
opts.Add('builtin_squish', "Use the builtin squish library (yes/no)", 'yes')
opts.Add('builtin_zlib', "Use the builtin zlib library (yes/no)", 'yes')

View file

@ -1,7 +1,50 @@
#!/usr/bin/env python
Import('env')
Import('env_modules')
env.add_source_files(env.modules_sources, "*.cpp")
env_regex = env_modules.Clone()
env_regex.Append(CPPFLAGS=["-DPCRE2_CODE_UNIT_WIDTH=0"])
env_regex.add_source_files(env.modules_sources, "*.cpp")
Export('env')
if (env['builtin_pcre2'] != 'no'):
thirdparty_dir = "#thirdparty/pcre2/src/"
thirdparty_flags = ["-DPCRE2_STATIC", "-DHAVE_CONFIG_H", "-DSUPPORT_JIT"]
thirdparty_sources = [
"pcre2_auto_possess.c",
"pcre2_chartables.c",
"pcre2_compile.c",
"pcre2_config.c",
"pcre2_context.c",
"pcre2_dfa_match.c",
"pcre2_error.c",
"pcre2_find_bracket.c",
"pcre2_jit_compile.c",
"pcre2_maketables.c",
"pcre2_match.c",
"pcre2_match_data.c",
"pcre2_newline.c",
"pcre2_ord2utf.c",
"pcre2_pattern_info.c",
"pcre2_serialize.c",
"pcre2_string_utils.c",
"pcre2_study.c",
"pcre2_substitute.c",
"pcre2_substring.c",
"pcre2_tables.c",
"pcre2_ucd.c",
"pcre2_valid_utf.c",
"pcre2_xclass.c",
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
env_regex.Append(CPPPATH=[thirdparty_dir])
env_regex.Append(CPPFLAGS=thirdparty_flags)
def pcre2_builtin(width):
env_pcre2 = env_modules.Clone()
env_pcre2["OBJSUFFIX"] = "_" + width + env_pcre2["OBJSUFFIX"]
env_pcre2.Append(CPPPATH=[thirdparty_dir])
env_pcre2.add_source_files(env.modules_sources, thirdparty_sources)
env_pcre2.Append(CPPFLAGS=thirdparty_flags)
env_pcre2.Append(CPPFLAGS=["-DPCRE2_CODE_UNIT_WIDTH=" + width])
pcre2_builtin("16")
pcre2_builtin("32")

File diff suppressed because it is too large Load diff

View file

@ -31,59 +31,53 @@
#ifndef REGEX_H
#define REGEX_H
#include "core/array.h"
#include "core/dictionary.h"
#include "core/map.h"
#include "core/reference.h"
#include "core/resource.h"
#include "core/ustring.h"
#include "core/vector.h"
class RegExNode;
class RegExMatch : public Reference {
GDCLASS(RegExMatch, Reference);
struct Group {
Variant name;
struct Range {
int start;
int length;
int end;
};
Vector<Group> captures;
String string;
String subject;
Vector<Range> data;
Map<String, int> names;
friend class RegEx;
friend class RegExSearch;
friend class RegExNodeCapturing;
friend class RegExNodeBackReference;
protected:
static void _bind_methods();
int _find(const Variant &p_name) const;
public:
String expand(const String &p_template) const;
String get_subject() const;
int get_group_count() const;
Array get_group_array() const;
Array get_names() const;
Dictionary get_name_dict() const;
Dictionary get_names() const;
Array get_strings() const;
String get_string(const Variant &p_name) const;
int get_start(const Variant &p_name) const;
int get_end(const Variant &p_name) const;
RegExMatch();
};
class RegEx : public Resource {
class RegEx : public Reference {
GDCLASS(RegEx, Resource);
GDCLASS(RegEx, Reference);
RegExNode *root;
Vector<Variant> group_names;
void *general_ctx;
void *code;
String pattern;
int lookahead_depth;
void _pattern_info(uint32_t what, void *where) const;
protected:
static void _bind_methods();
@ -91,9 +85,10 @@ protected:
public:
void clear();
Error compile(const String &p_pattern);
void _init(const String &p_pattern = "");
Ref<RegExMatch> search(const String &p_text, int p_start = 0, int p_end = -1) const;
String sub(const String &p_text, const String &p_replacement, bool p_all = false, int p_start = 0, int p_end = -1) const;
Ref<RegExMatch> search(const String &p_subject, int offset = 0, int end = -1) const;
String sub(const String &p_subject, const String &p_replacement, bool p_all = false, int p_start = 0, int p_end = -1) const;
bool is_valid() const;
String get_pattern() const;

View file

@ -188,6 +188,11 @@ def configure(env):
if any(platform.machine() in s for s in list_of_x86):
env["x86_libtheora_opt_gcc"] = True
# On Linux wchar_t should be 32-bits
# 16-bit library shouldn't be required due to compiler optimisations
if (env['builtin_pcre2'] == 'no'):
env.ParseConfig('pkg-config libpcre2-32 --cflags --libs')
## Flags
if (os.system("pkg-config --exists alsa") == 0): # 0 means found

12
thirdparty/README.md vendored
View file

@ -295,6 +295,18 @@ Files extracted from upstream source:
- celt/ and silk/ subfolders
- COPYING
## pcre2
- Upstream: http://www.pcre.org/
- Version: 10.23
- License: BSD-3-Clause
Files extracted from upstream source:
- Files listed in NON-AUTOTOOLS-BUILD steps 1-4
- All .h files in src/
- src/pcre2_jit_*.c and src/sljit/*
- AUTHORS and COPYING
## pvrtccompressor

36
thirdparty/pcre2/AUTHORS vendored Normal file
View file

@ -0,0 +1,36 @@
THE MAIN PCRE2 LIBRARY CODE
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2017 University of Cambridge
All rights reserved
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
####

83
thirdparty/pcre2/LICENCE vendored Normal file
View file

@ -0,0 +1,83 @@
PCRE2 LICENCE
-------------
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE2, supplied in the "doc"
directory, is distributed under the same terms as the software itself. The data
in the testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
optimize pattern matching. This is an optional feature that can be omitted when
the library is built.
THE BASIC LIBRARY FUNCTIONS
---------------------------
Written by: Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2017 University of Cambridge
All rights reserved.
PCRE2 JUST-IN-TIME COMPILATION SUPPORT
--------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
THE "BSD" LICENCE
-----------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
End

359
thirdparty/pcre2/src/config.h vendored Normal file
View file

@ -0,0 +1,359 @@
/* src/config.h. Generated from config.h.in by configure. */
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
so that config.h is included at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE2 at runtime.
*/
/* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
is not possible to build a version of PCRE2 that supports both EBCDIC and
UTF-8/16/32. */
/* #undef EBCDIC */
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. */
/* #undef EBCDIC_NL25 */
/* Define to 1 if you have the `bcopy' function. */
/* #undef HAVE_BCOPY */
/* Define to 1 if you have the <bzlib.h> header file. */
/* #undef HAVE_BZLIB_H */
/* Define to 1 if you have the <dirent.h> header file. */
/* #undef HAVE_DIRENT_H */
/* Define to 1 if you have the <dlfcn.h> header file. */
/* #undef HAVE_DLFCN_H */
/* Define to 1 if you have the <editline/readline.h> header file. */
/* #undef HAVE_EDITLINE_READLINE_H */
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
/* #undef HAVE_EDIT_READLINE_READLINE_H */
/* Define to 1 if you have the <inttypes.h> header file. */
/* #undef HAVE_INTTYPES_H */
/* Define to 1 if you have the <limits.h> header file. */
/* #undef HAVE_LIMITS_H */
/* Define to 1 if you have the `memmove' function. */
/* #undef HAVE_MEMMOVE */
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
/* Define to 1 if you have the `mkostemp' function. */
/* #undef HAVE_MKOSTEMP */
/* Define if you have POSIX threads libraries and header files. */
/* #undef HAVE_PTHREAD */
/* Have PTHREAD_PRIO_INHERIT. */
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
/* Define to 1 if you have the <readline/history.h> header file. */
/* #undef HAVE_READLINE_HISTORY_H */
/* Define to 1 if you have the <readline/readline.h> header file. */
/* #undef HAVE_READLINE_READLINE_H */
/* Define to 1 if you have the `secure_getenv' function. */
/* #undef HAVE_SECURE_GETENV */
/* Define to 1 if you have the <stdint.h> header file. */
/* #undef HAVE_STDINT_H */
/* Define to 1 if you have the <stdlib.h> header file. */
/* #undef HAVE_STDLIB_H */
/* Define to 1 if you have the `strerror' function. */
/* #undef HAVE_STRERROR */
/* Define to 1 if you have the <strings.h> header file. */
/* #undef HAVE_STRINGS_H */
/* Define to 1 if you have the <string.h> header file. */
/* #undef HAVE_STRING_H */
/* Define to 1 if you have the <sys/stat.h> header file. */
/* #undef HAVE_SYS_STAT_H */
/* Define to 1 if you have the <sys/types.h> header file. */
/* #undef HAVE_SYS_TYPES_H */
/* Define to 1 if you have the <sys/wait.h> header file. */
/* #undef HAVE_SYS_WAIT_H */
/* Define to 1 if you have the <unistd.h> header file. */
/* #undef HAVE_UNISTD_H */
/* Define to 1 if the compiler supports simple visibility declarations. */
/* #undef HAVE_VISIBILITY */
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* PCRE2 uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
use recursion in the match() function; instead it creates its own stack by
steam using memory from the heap. For more detail, see the comments and
other stuff just above the match() function. */
/* #undef HEAP_MATCH_RECURSE */
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
/* Define to the sub-directory where libtool stores uninstalled libraries. */
/* This is ignored unless you are using libtool. */
#ifndef LT_OBJDIR
#define LT_OBJDIR ".libs/"
#endif
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre2_match(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
calls of match(). To have any useful effect, it must be less than the value
of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
is a runtime method for setting a different limit. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
/* #undef NEVER_BACKSLASH_C */
/* The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
(ANYCRLF). */
#ifndef NEWLINE_DEFAULT
#define NEWLINE_DEFAULT 2
#endif
/* Name of package */
#define PACKAGE "pcre2"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 10.23"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "10.23"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
stack that is used while compiling a pattern. */
#ifndef PARENS_NEST_LIMIT
#define PARENS_NEST_LIMIT 250
#endif
/* The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
pcre2grep to hold parts of the file it is searching. The buffer will be
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing
very long lines. The actual amount of memory used by pcre2grep is three
times this number, because it allows for the buffering of "before" and
"after" lines. */
#ifndef PCRE2GREP_BUFSIZE
#define PCRE2GREP_BUFSIZE 20480
#endif
/* The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer
used by pcre2grep to hold parts of the file it is searching. The actual
amount of memory used by pcre2grep is three times this number, because it
allows for the buffering of "before" and "after" lines. */
#ifndef PCRE2GREP_MAX_BUFSIZE
#define PCRE2GREP_MAX_BUFSIZE 1048576
#endif
/* Define to any value to include debugging code. */
/* #undef PCRE2_DEBUG */
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, a suitable
__declspec value is used for Windows systems; in other environments
"extern" is used for a C compiler and "extern C" for a C++ compiler.
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
/* #undef PCRE2_EXP_DEFN */
/* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE2_STATIC */
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
/* #undef PTHREAD_CREATE_JOINABLE */
/* Define to 1 if you have the ANSI C header files. */
/* #undef STDC_HEADERS */
/* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to any value to allow pcre2test to be linked with libedit. */
/* #undef SUPPORT_LIBEDIT */
/* Define to any value to allow pcre2test to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to any value to allow pcre2grep to be linked with libz, so that it
is able to handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to any value to enable callout script support in pcre2grep. */
/* #undef SUPPORT_PCRE2GREP_CALLOUT */
/* Define to any value to enable JIT support in pcre2grep. Note that this will
have no effect unless SUPPORT_JIT is also defined. */
/* #undef SUPPORT_PCRE2GREP_JIT */
/* Define to any value to enable the 16 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_16 */
/* Define to any value to enable the 32 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_32 */
/* Define to any value to enable the 8 bit PCRE2 library. */
/* #undef SUPPORT_PCRE2_8 */
/* Define to any value to enable support for Unicode and UTF encoding. This
will work even in an EBCDIC environment, but it is incompatible with the
EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/Unicode, but not both at once. */
/* #undef SUPPORT_UNICODE */
/* Define to any value for valgrind support to find invalid memory reads. */
/* #undef SUPPORT_VALGRIND */
/* Enable extensions on AIX 3, Interix. */
#ifndef _ALL_SOURCE
# define _ALL_SOURCE 1
#endif
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
#endif
/* Enable threading extensions on Solaris. */
#ifndef _POSIX_PTHREAD_SEMANTICS
# define _POSIX_PTHREAD_SEMANTICS 1
#endif
/* Enable extensions on HP NonStop. */
#ifndef _TANDEM_SOURCE
# define _TANDEM_SOURCE 1
#endif
/* Enable general extensions on Solaris. */
#ifndef __EXTENSIONS__
# define __EXTENSIONS__ 1
#endif
/* Version number of package */
#define VERSION "10.23"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
/* #undef _POSIX_1_SOURCE */
/* Define to 1 if you need to in order for `stat' and other things to work. */
/* #undef _POSIX_SOURCE */
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef int64_t */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

771
thirdparty/pcre2/src/pcre2.h vendored Normal file
View file

@ -0,0 +1,771 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_H_IDEMPOTENT_GUARD
#define PCRE2_H_IDEMPOTENT_GUARD
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 23
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2017-02-14
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
export setting is defined in pcre2_internal.h, which includes this file. So we
don't change existing definitions of PCRE2_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE2_STATIC)
# ifndef PCRE2_EXP_DECL
# define PCRE2_EXP_DECL extern __declspec(dllimport)
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE2_EXP_DECL
# ifdef __cplusplus
# define PCRE2_EXP_DECL extern "C"
# else
# define PCRE2_EXP_DECL extern
# endif
#endif
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order so make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE2_CALL_CONVENTION
#define PCRE2_CALL_CONVENTION
#endif
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
uint8_t, UCHAR_MAX, etc are defined. */
#include <limits.h>
#include <stdlib.h>
#include <stdint.h>
/* Allow for C++ users compiling this directly. */
#ifdef __cplusplus
extern "C" {
#endif
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
is passed. Put these bits at the most significant end of the options word so
others can be added next to them */
#define PCRE2_ANCHORED 0x80000000u
#define PCRE2_NO_UTF_CHECK 0x40000000u
/* The following option bits can be passed only to pcre2_compile(). However,
they may affect compilation, JIT compilation, and/or interpretive execution.
The following tags indicate which:
C alters what is compiled by pcre2_compile()
J alters what is compiled by pcre2_jit_compile()
M is inspected during pcre2_match() execution
D is inspected during pcre2_dfa_match() execution
*/
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
#define PCRE2_ALT_BSUX 0x00000002u /* C */
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
#define PCRE2_CASELESS 0x00000008u /* C */
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
#define PCRE2_DOTALL 0x00000020u /* C */
#define PCRE2_DUPNAMES 0x00000040u /* C */
#define PCRE2_EXTENDED 0x00000080u /* C */
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
#define PCRE2_MULTILINE 0x00000400u /* C */
#define PCRE2_NEVER_UCP 0x00000800u /* C */
#define PCRE2_NEVER_UTF 0x00001000u /* C */
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
#define PCRE2_UCP 0x00020000u /* C J M D */
#define PCRE2_UNGREEDY 0x00040000u /* C */
#define PCRE2_UTF 0x00080000u /* C J M D */
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
/* These are for pcre2_jit_compile(). */
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
functions (though pcre2_jit_match() ignores the latter since it bypasses all
sanity checks). */
#define PCRE2_NOTBOL 0x00000001u
#define PCRE2_NOTEOL 0x00000002u
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
#define PCRE2_PARTIAL_SOFT 0x00000010u
#define PCRE2_PARTIAL_HARD 0x00000020u
/* These are additional options for pcre2_dfa_match(). */
#define PCRE2_DFA_RESTART 0x00000040u
#define PCRE2_DFA_SHORTEST 0x00000080u
/* These are additional options for pcre2_substitute(), which passes any others
through to pcre2_match(). */
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
ignored for pcre2_jit_match(). */
#define PCRE2_NO_JIT 0x00002000u
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
#define PCRE2_NEWLINE_CR 1
#define PCRE2_NEWLINE_LF 2
#define PCRE2_NEWLINE_CRLF 3
#define PCRE2_NEWLINE_ANY 4
#define PCRE2_NEWLINE_ANYCRLF 5
#define PCRE2_BSR_UNICODE 1
#define PCRE2_BSR_ANYCRLF 2
/* Error codes: no match and partial match are "expected" errors. */
#define PCRE2_ERROR_NOMATCH (-1)
#define PCRE2_ERROR_PARTIAL (-2)
/* Error codes for UTF-8 validity checks */
#define PCRE2_ERROR_UTF8_ERR1 (-3)
#define PCRE2_ERROR_UTF8_ERR2 (-4)
#define PCRE2_ERROR_UTF8_ERR3 (-5)
#define PCRE2_ERROR_UTF8_ERR4 (-6)
#define PCRE2_ERROR_UTF8_ERR5 (-7)
#define PCRE2_ERROR_UTF8_ERR6 (-8)
#define PCRE2_ERROR_UTF8_ERR7 (-9)
#define PCRE2_ERROR_UTF8_ERR8 (-10)
#define PCRE2_ERROR_UTF8_ERR9 (-11)
#define PCRE2_ERROR_UTF8_ERR10 (-12)
#define PCRE2_ERROR_UTF8_ERR11 (-13)
#define PCRE2_ERROR_UTF8_ERR12 (-14)
#define PCRE2_ERROR_UTF8_ERR13 (-15)
#define PCRE2_ERROR_UTF8_ERR14 (-16)
#define PCRE2_ERROR_UTF8_ERR15 (-17)
#define PCRE2_ERROR_UTF8_ERR16 (-18)
#define PCRE2_ERROR_UTF8_ERR17 (-19)
#define PCRE2_ERROR_UTF8_ERR18 (-20)
#define PCRE2_ERROR_UTF8_ERR19 (-21)
#define PCRE2_ERROR_UTF8_ERR20 (-22)
#define PCRE2_ERROR_UTF8_ERR21 (-23)
/* Error codes for UTF-16 validity checks */
#define PCRE2_ERROR_UTF16_ERR1 (-24)
#define PCRE2_ERROR_UTF16_ERR2 (-25)
#define PCRE2_ERROR_UTF16_ERR3 (-26)
/* Error codes for UTF-32 validity checks */
#define PCRE2_ERROR_UTF32_ERR1 (-27)
#define PCRE2_ERROR_UTF32_ERR2 (-28)
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
functions, and serializing functions. They are in numerical order. Originally
they were in alphabetical order too, but now that PCRE2 is released, the
numbers must not be changed. */
#define PCRE2_ERROR_BADDATA (-29)
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
#define PCRE2_ERROR_BADMAGIC (-31)
#define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34)
#define PCRE2_ERROR_BADREPLACEMENT (-35)
#define PCRE2_ERROR_BADUTFOFFSET (-36)
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
#define PCRE2_ERROR_DFA_BADRESTART (-38)
#define PCRE2_ERROR_DFA_RECURSE (-39)
#define PCRE2_ERROR_DFA_UCOND (-40)
#define PCRE2_ERROR_DFA_UFUNC (-41)
#define PCRE2_ERROR_DFA_UITEM (-42)
#define PCRE2_ERROR_DFA_WSSIZE (-43)
#define PCRE2_ERROR_INTERNAL (-44)
#define PCRE2_ERROR_JIT_BADOPTION (-45)
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
#define PCRE2_ERROR_MATCHLIMIT (-47)
#define PCRE2_ERROR_NOMEMORY (-48)
#define PCRE2_ERROR_NOSUBSTRING (-49)
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
#define PCRE2_ERROR_NULL (-51)
#define PCRE2_ERROR_RECURSELOOP (-52)
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
#define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
#define PCRE2_ERROR_BADREPESCAPE (-57)
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
/* Request types for pcre2_pattern_info() */
#define PCRE2_INFO_ALLOPTIONS 0
#define PCRE2_INFO_ARGOPTIONS 1
#define PCRE2_INFO_BACKREFMAX 2
#define PCRE2_INFO_BSR 3
#define PCRE2_INFO_CAPTURECOUNT 4
#define PCRE2_INFO_FIRSTCODEUNIT 5
#define PCRE2_INFO_FIRSTCODETYPE 6
#define PCRE2_INFO_FIRSTBITMAP 7
#define PCRE2_INFO_HASCRORLF 8
#define PCRE2_INFO_JCHANGED 9
#define PCRE2_INFO_JITSIZE 10
#define PCRE2_INFO_LASTCODEUNIT 11
#define PCRE2_INFO_LASTCODETYPE 12
#define PCRE2_INFO_MATCHEMPTY 13
#define PCRE2_INFO_MATCHLIMIT 14
#define PCRE2_INFO_MAXLOOKBEHIND 15
#define PCRE2_INFO_MINLENGTH 16
#define PCRE2_INFO_NAMECOUNT 17
#define PCRE2_INFO_NAMEENTRYSIZE 18
#define PCRE2_INFO_NAMETABLE 19
#define PCRE2_INFO_NEWLINE 20
#define PCRE2_INFO_RECURSIONLIMIT 21
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
/* Request types for pcre2_config(). */
#define PCRE2_CONFIG_BSR 0
#define PCRE2_CONFIG_JIT 1
#define PCRE2_CONFIG_JITTARGET 2
#define PCRE2_CONFIG_LINKSIZE 3
#define PCRE2_CONFIG_MATCHLIMIT 4
#define PCRE2_CONFIG_NEWLINE 5
#define PCRE2_CONFIG_PARENSLIMIT 6
#define PCRE2_CONFIG_RECURSIONLIMIT 7
#define PCRE2_CONFIG_STACKRECURSE 8
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
/* Types for code units in patterns and subject strings. */
typedef uint8_t PCRE2_UCHAR8;
typedef uint16_t PCRE2_UCHAR16;
typedef uint32_t PCRE2_UCHAR32;
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
including pattern offsets for errors and subject offsets after a match. We
define special values to indicate zero-terminated strings and unset offsets in
the offset vector (ovector). */
#define PCRE2_SIZE size_t
#define PCRE2_SIZE_MAX SIZE_MAX
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
/* Generic types for opaque structures and JIT callback functions. These
declarations are defined in a macro that is expanded for each width later. */
#define PCRE2_TYPES_LIST \
struct pcre2_real_general_context; \
typedef struct pcre2_real_general_context pcre2_general_context; \
\
struct pcre2_real_compile_context; \
typedef struct pcre2_real_compile_context pcre2_compile_context; \
\
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
struct pcre2_real_match_data; \
typedef struct pcre2_real_match_data pcre2_match_data; \
\
struct pcre2_real_jit_stack; \
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
\
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. Define the generic version in a macro; the width-specific
versions are generated from this macro below. */
#define PCRE2_STRUCTURE_LIST \
typedef struct pcre2_callout_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
uint32_t callout_number; /* Number compiled into pattern */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
PCRE2_SIZE *offset_vector; /* The offset vector */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
PCRE2_SPTR subject; /* The subject being matched */ \
PCRE2_SIZE subject_length; /* The length of the subject */ \
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_block; \
\
typedef struct pcre2_callout_enumerate_block { \
uint32_t version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
uint32_t callout_number; /* Number compiled into pattern */ \
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
/* ------------------------------------------------------------------ */ \
} pcre2_callout_enumerate_block;
/* List the generic forms of all other functions in macros, which will be
expanded for each width below. Start with functions that give general
information. */
#define PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
/* Functions for manipulating contexts. */
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_copy(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
*pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
void (*)(void *, void *), void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_general_context_free(pcre2_general_context *);
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_copy(pcre2_compile_context *); \
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
*pcre2_compile_context_create(pcre2_general_context *);\
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_compile_context_free(pcre2_compile_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
int (*)(uint32_t, void *), void *);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_copy(pcre2_match_context *); \
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
*pcre2_match_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_recursion_memory_management(pcre2_match_context *, \
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
/* Functions concerned with compiling a pattern to PCRE internal code. */
#define PCRE2_COMPILE_FUNCTIONS \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
pcre2_compile_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_code_free(pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy(const pcre2_code *); \
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
*pcre2_code_copy_with_tables(const pcre2_code *);
/* Functions that give information about a compiled pattern. */
#define PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_callout_enumerate(const pcre2_code *, \
int (*)(pcre2_callout_enumerate_block *, void *), void *);
/* Functions for running a match and inspecting the result. */
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
*pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */
#define PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_free(PCRE2_UCHAR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_substring_list_free(PCRE2_SPTR *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
/* Functions for serializing / deserializing compiled patterns. */
#define PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
PCRE2_SIZE *, pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
pcre2_serialize_get_number_of_codes(const uint8_t *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_serialize_free(uint8_t *);
/* Convenience function for match + substitute. */
#define PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
uint32_t, pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_free_unused_memory(pcre2_general_context *); \
PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
*pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_jit_stack_free(pcre2_jit_stack *);
/* Other miscellaneous functions. */
#define PCRE2_OTHER_FUNCTIONS \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
*pcre2_maketables(pcre2_general_context *); \
/* Define macros that generate width-specific names from generic versions. The
three-level macro scheme is necessary to get the macros expanded when we want
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
generating three versions of everything below. After that, PCRE2_SUFFIX will be
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
pcre2_compile are called by application code. */
#define PCRE2_JOIN(a,b) a ## b
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
/* Data types */
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
/* Data blocks */
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
/* Functions: the complete list in alphabetical order */
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
/* Now generate all three sets of width-specific structures and function
prototypes. */
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
PCRE2_TYPES_LIST \
PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
PCRE2_MATCH_FUNCTIONS \
PCRE2_SUBSTRING_FUNCTIONS \
PCRE2_SERIALIZE_FUNCTIONS \
PCRE2_SUBSTITUTE_FUNCTION \
PCRE2_JIT_FUNCTIONS \
PCRE2_OTHER_FUNCTIONS
#define PCRE2_LOCAL_WIDTH 8
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 16
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
#define PCRE2_LOCAL_WIDTH 32
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_LOCAL_WIDTH
/* Undefine the list macros; they are no longer needed. */
#undef PCRE2_TYPES_LIST
#undef PCRE2_STRUCTURE_LIST
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS
#undef PCRE2_MATCH_FUNCTIONS
#undef PCRE2_SUBSTRING_FUNCTIONS
#undef PCRE2_SERIALIZE_FUNCTIONS
#undef PCRE2_SUBSTITUTE_FUNCTION
#undef PCRE2_JIT_FUNCTIONS
#undef PCRE2_OTHER_FUNCTIONS
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
#undef PCRE2_SUFFIX
#ifndef PCRE2_CODE_UNIT_WIDTH
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
#error Use 8, 16, or 32; or 0 for a multi-width application.
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
PCRE2_CODE_UNIT_WIDTH == 16 || \
PCRE2_CODE_UNIT_WIDTH == 32
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
#elif PCRE2_CODE_UNIT_WIDTH == 0
#undef PCRE2_JOIN
#undef PCRE2_GLUE
#define PCRE2_SUFFIX(a) a
#else
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
/* End of pcre2.h */

1293
thirdparty/pcre2/src/pcre2_auto_possess.c vendored Normal file

File diff suppressed because it is too large Load diff

198
thirdparty/pcre2/src/pcre2_chartables.c vendored Normal file
View file

@ -0,0 +1,198 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE2 by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE2) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without them gcc 4.x may remove the
array definition from the final binary if PCRE2 is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
const uint8_t PRIV(default_tables)[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre2_chartables.c */

9517
thirdparty/pcre2/src/pcre2_compile.c vendored Normal file

File diff suppressed because it is too large Load diff

218
thirdparty/pcre2/src/pcre2_config.c vendored Normal file
View file

@ -0,0 +1,218 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
its value gets changed by pcre2_internal.h to be in code units. */
static int configured_link_size = LINK_SIZE;
#include "pcre2_internal.h"
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
convenient for user programs that want to test their values. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/*************************************************
* Return info about what features are configured *
*************************************************/
/* If where is NULL, the length of memory required is returned.
Arguments:
what what information is required
where where to put the information
Returns: 0 if a numerical value is returned
>= 0 if a string value
PCRE2_ERROR_BADOPTION if "where" not recognized
or JIT target requested when JIT not enabled
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_config(uint32_t what, void *where)
{
if (where == NULL) /* Requests a length */
{
switch(what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT:
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_RECURSIONLIMIT:
case PCRE2_CONFIG_STACKRECURSE:
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
/* These are handled below */
case PCRE2_CONFIG_JITTARGET:
case PCRE2_CONFIG_UNICODE_VERSION:
case PCRE2_CONFIG_VERSION:
break;
}
}
switch (what)
{
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
#else
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
#endif
break;
case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
case PCRE2_CONFIG_JITTARGET:
#ifdef SUPPORT_JIT
{
const char *v = PRIV(jit_get_target)();
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
#else
return PCRE2_ERROR_BADOPTION;
#endif
case PCRE2_CONFIG_LINKSIZE:
*((uint32_t *)where) = (uint32_t)configured_link_size;
break;
case PCRE2_CONFIG_MATCHLIMIT:
*((uint32_t *)where) = MATCH_LIMIT;
break;
case PCRE2_CONFIG_NEWLINE:
*((uint32_t *)where) = NEWLINE_DEFAULT;
break;
case PCRE2_CONFIG_PARENSLIMIT:
*((uint32_t *)where) = PARENS_NEST_LIMIT;
break;
case PCRE2_CONFIG_RECURSIONLIMIT:
*((uint32_t *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE2_CONFIG_STACKRECURSE:
#ifdef HEAP_MATCH_RECURSE
*((uint32_t *)where) = 0;
#else
*((uint32_t *)where) = 1;
#endif
break;
case PCRE2_CONFIG_UNICODE_VERSION:
{
#if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version);
#else
const char *v = "Unicode not supported";
#endif
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
break;
case PCRE2_CONFIG_UNICODE:
#if defined SUPPORT_UNICODE
*((uint32_t *)where) = 1;
#else
*((uint32_t *)where) = 0;
#endif
break;
/* The hackery in setting "v" below is to cope with the case when
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
If the second alternative is used in this case, it does not leave a space
before the date. On the other hand, if all four macros are put into a single
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
There are problems using an "obvious" approach like this:
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
of STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what
we really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to
resort to a runtime test. */
case PCRE2_CONFIG_VERSION:
{
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
return (int)(1 + ((where == NULL)?
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
}
}
return 0;
}
/* End of pcre2_config.c */

391
thirdparty/pcre2/src/pcre2_context.c vendored Normal file
View file

@ -0,0 +1,391 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Default malloc/free functions *
*************************************************/
/* Ignore the "user data" argument in each case. */
static void *default_malloc(size_t size, void *data)
{
(void)data;
return malloc(size);
}
static void default_free(void *block, void *data)
{
(void)data;
free(block);
}
/*************************************************
* Get a block and save memory control *
*************************************************/
/* This internal function is called to get a block of memory in which the
memory control data is to be stored at the start for future use.
Arguments:
size amount of memory required
memctl pointer to a memctl block or NULL
Returns: pointer to memory or NULL on failure
*/
extern void *
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
{
pcre2_memctl *newmemctl;
void *yield = (memctl == NULL)? malloc(size) :
memctl->malloc(size, memctl->memory_data);
if (yield == NULL) return NULL;
newmemctl = (pcre2_memctl *)yield;
if (memctl == NULL)
{
newmemctl->malloc = default_malloc;
newmemctl->free = default_free;
newmemctl->memory_data = NULL;
}
else *newmemctl = *memctl;
return yield;
}
/*************************************************
* Create and initialize contexts *
*************************************************/
/* Initializing for compile and match contexts is done in separate, private
functions so that these can be called from functions such as pcre2_compile()
when an external context is not supplied. The initializing functions have an
option to set up default memory management. */
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
void (*private_free)(void *, void *), void *memory_data)
{
pcre2_general_context *gcontext;
if (private_malloc == NULL) private_malloc = default_malloc;
if (private_free == NULL) private_free = default_free;
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
if (gcontext == NULL) return NULL;
gcontext->memctl.malloc = private_malloc;
gcontext->memctl.free = private_free;
gcontext->memctl.memory_data = memory_data;
return gcontext;
}
/* A default compile context is set up to save having to initialize at run time
when no context is supplied to the compile function. */
const pcre2_compile_context PRIV(default_compile_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
NULL, /* Stack guard */
NULL, /* Stack guard data */
PRIV(default_tables), /* Character tables */
PCRE2_UNSET, /* Max pattern length */
BSR_DEFAULT, /* Backslash R default */
NEWLINE_DEFAULT, /* Newline convention */
PARENS_NEST_LIMIT }; /* As it says */
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_create(pcre2_general_context *gcontext)
{
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_compile_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
return ccontext;
}
/* A default match context is set up to save having to initialize at run time
when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
#ifdef HEAP_MATCH_RECURSE
{ default_malloc, default_free, NULL },
#endif
#ifdef SUPPORT_JIT
NULL,
NULL,
#endif
NULL,
NULL,
PCRE2_UNSET, /* Offset limit */
MATCH_LIMIT,
MATCH_LIMIT_RECURSION };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_create(pcre2_general_context *gcontext)
{
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL;
*mcontext = PRIV(default_match_context);
if (gcontext != NULL)
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
return mcontext;
}
/*************************************************
* Context copy functions *
*************************************************/
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_copy(pcre2_general_context *gcontext)
{
pcre2_general_context *new =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
gcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
return new;
}
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
{
pcre2_compile_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
ccontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
return new;
}
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_copy(pcre2_match_context *mcontext)
{
pcre2_match_context *new =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
mcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
return new;
}
/*************************************************
* Context free functions *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_general_context_free(pcre2_general_context *gcontext)
{
if (gcontext != NULL)
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_compile_context_free(pcre2_compile_context *ccontext)
{
if (ccontext != NULL)
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
}
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_context_free(pcre2_match_context *mcontext)
{
if (mcontext != NULL)
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
}
/*************************************************
* Set values in contexts *
*************************************************/
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
data is given. Only some of the functions are able to test the validity of the
data. */
/* ------------ Compile contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
{
ccontext->tables = tables;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
{
switch(value)
{
case PCRE2_BSR_ANYCRLF:
case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
{
ccontext->max_pattern_length = length;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{
switch(newline)
{
case PCRE2_NEWLINE_CR:
case PCRE2_NEWLINE_LF:
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
ccontext->newline_convention = newline;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
{
ccontext->parens_nest_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t, void *), void *user_data)
{
ccontext->stack_guard = guard;
ccontext->stack_guard_data = user_data;
return 0;
}
/* ------------ Match contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
{
mcontext->callout = callout;
mcontext->callout_data = callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->match_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->recursion_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
#ifdef HEAP_MATCH_RECURSE
mcontext->stack_memctl.malloc = mymalloc;
mcontext->stack_memctl.free = myfree;
mcontext->stack_memctl.memory_data = mydata;
#else
(void)mcontext;
(void)mymalloc;
(void)myfree;
(void)mydata;
#endif
return 0;
}
/* End of pcre2_context.c */

3628
thirdparty/pcre2/src/pcre2_dfa_match.c vendored Normal file

File diff suppressed because it is too large Load diff

325
thirdparty/pcre2/src/pcre2_error.c vendored Normal file
View file

@ -0,0 +1,325 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* The texts of compile-time error messages. Compile-time error numbers start
at COMPILE_ERROR_BASE (100).
This used to be a table of strings, but in order to reduce the number of
relocations needed when a shared library is loaded dynamically, it is now one
long string. We cannot use a table of offsets, because the lengths of inserts
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
pcre2_get_error_message() counts through to the one it wants - this isn't a
performance issue because these strings are used only when there is an error.
Each substring ends with \0 to insert a null character. This includes the final
substring, so that the whole string ends with \0\0, which can be detected when
counting through. */
static const unsigned char compile_error_texts[] =
"no error\0"
"\\ at end of pattern\0"
"\\c at end of pattern\0"
"unrecognized character follows \\\0"
"numbers out of order in {} quantifier\0"
/* 5 */
"number too big in {} quantifier\0"
"missing terminating ] for character class\0"
"invalid escape sequence in character class\0"
"range out of order in character class\0"
"quantifier does not follow a repeatable item\0"
/* 10 */
"internal error: unexpected repeat\0"
"unrecognized character after (? or (?-\0"
"POSIX named classes are supported only within a class\0"
"POSIX collating elements are not supported\0"
"missing closing parenthesis\0"
/* 15 */
"reference to non-existent subpattern\0"
"pattern passed as NULL\0"
"unrecognised compile-time option bit(s)\0"
"missing ) after (?# comment\0"
"parentheses are too deeply nested\0"
/* 20 */
"regular expression is too large\0"
"failed to allocate heap memory\0"
"unmatched closing parenthesis\0"
"internal error: code overflow\0"
"missing closing parenthesis for condition\0"
/* 25 */
"lookbehind assertion is not fixed length\0"
"a relative value of zero is not allowed\0"
"conditional group contains more than two branches\0"
"assertion expected after (?( or (?(?C)\0"
"digit expected after (?+ or (?-\0"
/* 30 */
"unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0"
"this version of PCRE2 does not have Unicode support\0"
"parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0"
/* 35 */
"lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0"
/* 40 */
"invalid escape sequence in (*VERB) name\0"
"unrecognized character after (?P\0"
"syntax error in subpattern name (missing terminator)\0"
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
"group name must start with a non-digit\0"
/* 45 */
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
"malformed \\P or \\p sequence\0"
"unknown property name after \\P or \\p\0"
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
/* 50 */
"invalid range in character class\0"
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
"internal error: overran compiling workspace\0"
"internal error: previously-checked referenced subpattern not found\0"
"DEFINE group contains more than one branch\0"
/* 55 */
"missing opening brace after \\o\0"
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
"(*VERB) not recognized or malformed\0"
"group number is too big\0"
"subpattern name expected\0"
"internal error: parsed pattern overflow\0"
"non-octal character in \\o{} (closing brace missing?)\0"
/* 65 */
"different names for subpatterns of the same number are not allowed\0"
"(*MARK) must have an argument\0"
"non-hex character in \\x{} (closing brace missing?)\0"
#ifndef EBCDIC
"\\c must be followed by a printable ASCII character\0"
#else
"\\c must be followed by a letter or one of [\\]^_?\0"
#endif
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown meta code in check_lookbehinds()\0"
"\\N is not supported in a class\0"
"callout string is too long\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"using UTF is disabled by the application\0"
/* 75 */
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
"syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
"missing terminating delimiter for callout with string argument\0"
"unrecognized string delimiter follows (?C\0"
"using \\C is disabled by the application\0"
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
/* 85 */
"using \\C is disabled in this PCRE2 library\0"
"regular expression is too complicated\0"
"lookbehind assertion is too long\0"
"pattern string is longer than the limit set by the application\0"
"internal error: unknown code in parsed pattern\0"
/* 90 */
"internal error: bad code value in parsed_skip()\0"
;
/* Match-time and UTF error texts are in the same format. */
static const unsigned char match_error_texts[] =
"no error\0"
"no match\0"
"partial match\0"
"UTF-8 error: 1 byte missing at end\0"
"UTF-8 error: 2 bytes missing at end\0"
/* 5 */
"UTF-8 error: 3 bytes missing at end\0"
"UTF-8 error: 4 bytes missing at end\0"
"UTF-8 error: 5 bytes missing at end\0"
"UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 6 top bits not 0x80\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0"
/* 20 */
"UTF-8 error: overlong 5-byte sequence\0"
"UTF-8 error: overlong 6-byte sequence\0"
"UTF-8 error: isolated byte with 0x80 bit set\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0"
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad data value\0"
/* 30 */
"patterns do not all use the same character tables\0"
"magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0"
"bad option value\0"
/* 35 */
"invalid replacement string\0"
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
/* 40 */
"backreference condition or recursion test is not supported for DFA matching\0"
"function is not supported for DFA matching\0"
"pattern contains an item that is not supported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
/* 45 */
"bad JIT option\0"
"JIT stack limit reached\0"
"match limit exceeded\0"
"no more memory\0"
"unknown substring\0"
/* 50 */
"non-unique substring name\0"
"NULL argument passed\0"
"nested recursion at the same subject position\0"
"recursion limit exceeded\0"
"requested value is not available\0"
/* 55 */
"requested value is not set\0"
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
"bad escape sequence in replacement string\0"
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
"match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
;
/*************************************************
* Return error message *
*************************************************/
/* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all
distinct.
Arguments:
enumber error number
buffer where to put the message (zero terminated)
size size of the buffer
Returns: length of message if all is well
negative on error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
{
const unsigned char *message;
PCRE2_SIZE i;
int n;
if (size == 0) return PCRE2_ERROR_NOMEMORY;
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
{
message = compile_error_texts;
n = enumber - COMPILE_ERROR_BASE;
}
else if (enumber < 0) /* Match or UTF error */
{
message = match_error_texts;
n = -enumber;
}
else /* Invalid error number */
{
message = (unsigned char *)"\0"; /* Empty message list */
n = 1;
}
for (; n > 0; n--)
{
while (*message++ != CHAR_NULL) {};
if (*message == CHAR_NULL) return PCRE2_ERROR_BADDATA;
}
for (i = 0; *message != 0; i++)
{
if (i >= size - 1)
{
buffer[i] = 0; /* Terminate partial message */
return PCRE2_ERROR_NOMEMORY;
}
buffer[i] = *message++;
}
buffer[i] = 0;
return (int)i;
}
/* End of pcre2_error.c */

View file

@ -0,0 +1,218 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a single function that scans through a compiled pattern
until it finds a capturing bracket with the given number, or, if the number is
negative, an instance of OP_REVERSE for a lookbehind. The function is called
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
matching length. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Scan compiled regex for specific bracket *
*************************************************/
/*
Arguments:
code points to start of expression
utf TRUE in UTF mode
number the required bracket number or negative to find a lookbehind
Returns: pointer to the opcode for the bracket, or NULL if not found
*/
PCRE2_SPTR
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
{
for (;;)
{
PCRE2_UCHAR c = *code;
if (c == OP_END) return NULL;
/* XCLASS is used for classes that cannot be represented just by a bit map.
This includes negated single high-valued characters. CALLOUT_STR is used for
callouts with string arguments. In both cases the length in the table is
zero; the actual length is stored in the compiled code. */
if (c == OP_XCLASS) code += GET(code, 1);
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
/* Handle lookbehind */
else if (c == OP_REVERSE)
{
if (number < 0) return (PCRE2_UCHAR *)code;
code += PRIV(OP_lengths)[c];
}
/* Handle capturing bracket */
else if (c == OP_CBRA || c == OP_SCBRA ||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
{
int n = (int)GET2(code, 1+LINK_SIZE);
if (n == number) return (PCRE2_UCHAR *)code;
code += PRIV(OP_lengths)[c];
}
/* Otherwise, we can get the item's length from the table, except that for
repeated character types, we have to test for \p and \P, which have an extra
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
must add in its length. */
else
{
switch(c)
{
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSUPTO:
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
code += 2;
break;
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
code += code[1];
break;
}
/* Add in the fixed length from the table */
code += PRIV(OP_lengths)[c];
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
followed by a multi-byte character. The length in the table is a minimum, so
we have to arrange to skip the extra bytes. */
#ifdef MAYBE_UTF_MULTI
if (utf) switch(c)
{
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_EXACT:
case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_UPTO:
case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_PLUS:
case OP_PLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_QUERY:
case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
break;
}
#else
(void)(utf); /* Keep compiler happy by referencing function argument */
#endif /* MAYBE_UTF_MULTI */
}
}
}
/* End of pcre2_find_bracket.c */

1940
thirdparty/pcre2/src/pcre2_internal.h vendored Normal file

File diff suppressed because it is too large Load diff

862
thirdparty/pcre2/src/pcre2_intmodedep.h vendored Normal file
View file

@ -0,0 +1,862 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains mode-dependent macro and structure definitions. The
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
These mode-dependent items are kept in a separate file so that they can also be
#included multiple times for different code unit widths by pcre2test in order
to have access to the hidden structures at all supported widths.
Some of the mode-dependent macros are required at different widths for
different parts of the pcre2test code (in particular, the included
pcre_printint.c file). We undefine them here so that they can be re-defined for
multiple inclusions. Not all of these are used in pcre2test, but it's easier
just to undefine them all. */
#undef ACROSSCHAR
#undef BACKCHAR
#undef BYTES2CU
#undef CU2BYTES
#undef FORWARDCHAR
#undef FORWARDCHARTEST
#undef GET
#undef GET2
#undef GETCHAR
#undef GETCHARINC
#undef GETCHARINCTEST
#undef GETCHARLEN
#undef GETCHARLENTEST
#undef GETCHARTEST
#undef GET_EXTRALEN
#undef HAS_EXTRALEN
#undef IMM2_SIZE
#undef MAX_255
#undef MAX_MARK
#undef MAX_PATTERN_SIZE
#undef MAX_UTF_SINGLE_CU
#undef NOT_FIRSTCU
#undef PUT
#undef PUT2
#undef PUT2INC
#undef PUTCHAR
#undef PUTINC
#undef TABLE_GET
/* -------------------------- MACROS ----------------------------- */
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
(always stored in big-endian order in 8-bit mode) by default. These are used,
for example, to link from the start of a subpattern to its alternatives and its
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
to around 64K, which is big enough for almost everybody. However, I received a
request for an even bigger limit. For this reason, and also to make the code
easier to maintain, the storing and loading of offsets from the compiled code
unit string is now handled by the macros that are defined here.
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
values of 3 or 4 are also supported. */
/* ------------------- 8-bit support ------------------ */
#if PCRE2_CODE_UNIT_WIDTH == 8
#if LINK_SIZE == 2
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
#define MAX_PATTERN_SIZE (1 << 16)
#elif LINK_SIZE == 3
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
#define MAX_PATTERN_SIZE (1 << 24)
#elif LINK_SIZE == 4
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
#define GET(a,n) \
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error LINK_SIZE must be 2, 3, or 4
#endif
/* ------------------- 16-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 16
#if LINK_SIZE == 2
#undef LINK_SIZE
#define LINK_SIZE 1
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)(d))
#define GET(a,n) \
(a[n])
#define MAX_PATTERN_SIZE (1 << 16)
#elif LINK_SIZE == 3 || LINK_SIZE == 4
#undef LINK_SIZE
#define LINK_SIZE 2
#define PUT(a,n,d) \
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
#define GET(a,n) \
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error LINK_SIZE must be 2, 3, or 4
#endif
/* ------------------- 32-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 32
#undef LINK_SIZE
#define LINK_SIZE 1
#define PUT(a,n,d) \
(a[n] = (d))
#define GET(a,n) \
(a[n])
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
#else
#error Unsupported compiling mode
#endif
/* --------------- Other mode-specific macros ----------------- */
/* PCRE uses some other (at least) 16-bit quantities that do not change when
the size of offsets changes. There are used for repeat counts and for other
things such as capturing parenthesis numbers in back references.
Define the number of code units required to hold a 16-bit count/offset, and
macros to load and store such a value. For reasons that I do not understand,
the expression in the 8-bit GET2 macro is treated by gcc as a signed
expression, even when a is declared as unsigned. It seems that any kind of
arithmetic results in a signed value. Hence the cast. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define IMM2_SIZE 2
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
#else /* Code units are 16 or 32 bits */
#define IMM2_SIZE 1
#define GET2(a,n) a[n]
#define PUT2(a,n,d) a[n] = d
#endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
whether its argument, which is assumed to be one code unit, is less than 256.
The maximum length of a MARK name must fit in one code unit; currently it is
set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
containing exactly 256 items. When code points can be greater than 255, a check
is needed before accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1)
#ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS
#endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
#define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
#endif
/* ----------------- Character-handling macros ----------------- */
/* There is a proposed future special "UTF-21" mode, in which only the lowest
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
high-order bits available to the application for other uses. In preparation for
the future implementation of this mode, there are macros that load a data item
and, if in this special mode, mask it to 21 bits. These macros all have names
starting with UCHAR21. In all other modes, including the normal 32-bit
library, the macros all have the same simple definitions. When the new mode is
implemented, it is expected that these definitions will be varied appropriately
using #ifdef when compiling the library that supports the special mode. */
#define UCHAR21(eptr) (*(eptr))
#define UCHAR21TEST(eptr) (*(eptr))
#define UCHAR21INC(eptr) (*(eptr)++)
#define UCHAR21INCTEST(eptr) (*(eptr)++)
/* When UTF encoding is being used, a character is no longer just a single
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
handling generate simple sequences when used in the basic mode, and more
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
used when UTF is not supported. To make sure they can never even appear when
UTF support is omitted, we don't even define them. */
#ifndef SUPPORT_UNICODE
/* #define MAX_UTF_SINGLE_CU */
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
/* #define NOT_FIRSTCU(c) */
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
#define GETCHARINCTEST(c, eptr) c = *eptr++;
#define GETCHARLEN(c, eptr, len) c = *eptr;
#define PUTCHAR(c, p) (*p = c, 1)
/* #define GETCHARLENTEST(c, eptr, len) */
/* #define BACKCHAR(eptr) */
/* #define FORWARDCHAR(eptr) */
/* #define FORWARCCHARTEST(eptr,end) */
/* #define ACROSSCHAR(condition, eptr, action) */
#else /* SUPPORT_UNICODE */
/* ------------------- 8-bit support ------------------ */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
/* The largest UTF code point that can be encoded as a single code unit. */
#define MAX_UTF_SINGLE_CU 127
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
/* Returns TRUE, if the given value is not the first code unit of a UTF
sequence. */
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
/* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */
#define GETCHAR(c, eptr) \
c = *eptr; \
if (c >= 0xc0u) GETUTF8(c, eptr);
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *eptr; \
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
/* Get the next UTF-8 character, advancing the pointer. This is called when we
know we are in UTF-8 mode. */
#define GETCHARINC(c, eptr) \
c = *eptr++; \
if (c >= 0xc0u) GETUTF8INC(c, eptr);
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-8 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
#define GETCHARLEN(c, eptr, len) \
c = *eptr; \
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
pointer, incrementing length if there are extra bytes. This is called when we
do not know if we are in UTF-8 mode. */
#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-8 mode - we don't put a test within the macro
because almost all calls are already within a block of UTF-8 only code. */
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((eptr) & 0xc0u) == 0x80u) action
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
PRIV(ord2utf)(c,p) : (*p = c, 1))
/* ------------------- 16-bit support ------------------ */
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
/* The largest UTF code point that can be encoded as a single code unit. */
#define MAX_UTF_SINGLE_CU 65535
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) 1
/* Returns TRUE, if the given value is not the first code unit of a UTF
sequence. */
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer. */
#define GETUTF16(c, eptr) \
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
/* Get the next UTF-16 character, not advancing the pointer. This is called when
we know we are in UTF-16 mode. */
#define GETCHAR(c, eptr) \
c = *eptr; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *eptr; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
the pointer. */
#define GETUTF16INC(c, eptr) \
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
/* Get the next UTF-16 character, advancing the pointer. This is called when we
know we are in UTF-16 mode. */
#define GETCHARINC(c, eptr) \
c = *eptr++; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-16 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *eptr++; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer, incrementing the length. */
#define GETUTF16LEN(c, eptr, len) \
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
/* Get the next UTF-16 character, not advancing the pointer, incrementing
length if there is a low surrogate. This is called when we know we are in
UTF-16 mode. */
#define GETCHARLEN(c, eptr, len) \
c = *eptr; \
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
pointer, incrementing length if there is a low surrogate. This is called when
we do not know if we are in UTF-16 mode. */
#define GETCHARLENTEST(c, eptr, len) \
c = *eptr; \
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-16 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-16 only
code. */
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
if ((condition) && ((eptr) & 0xfc00u) == 0xdc00u) action
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
PRIV(ord2utf)(c,p) : (*p = c, 1))
/* ------------------- 32-bit support ------------------ */
#else
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
into one PCRE2_UCHAR unit. */
#define MAX_UTF_SINGLE_CU (0x10ffffu)
#define HAS_EXTRALEN(c) (0)
#define GET_EXTRALEN(c) (0)
#define NOT_FIRSTCU(c) (0)
/* Get the next UTF-32 character, not advancing the pointer. This is called when
we know we are in UTF-32 mode. */
#define GETCHAR(c, eptr) \
c = *(eptr);
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
pointer. */
#define GETCHARTEST(c, eptr) \
c = *(eptr);
/* Get the next UTF-32 character, advancing the pointer. This is called when we
know we are in UTF-32 mode. */
#define GETCHARINC(c, eptr) \
c = *((eptr)++);
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
This is called when we don't know if we are in UTF-32 mode. */
#define GETCHARINCTEST(c, eptr) \
c = *((eptr)++);
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
length (since all UTF-32 is of length 1). This is called when we know we are in
UTF-32 mode. */
#define GETCHARLEN(c, eptr, len) \
GETCHAR(c, eptr)
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
pointer, not incrementing the length (since all UTF-32 is of length 1).
This is called when we do not know if we are in UTF-32 mode. */
#define GETCHARLENTEST(c, eptr, len) \
GETCHARTEST(c, eptr)
/* If the pointer is not at the start of a character, move it back until
it is. This is called only in UTF-32 mode - we don't put a test within the
macro because almost all calls are already within a block of UTF-32 only
code.
These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
#define BACKCHAR(eptr) do { } while (0)
/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) do { } while (0)
#define FORWARDCHARTEST(eptr,end) do { } while (0)
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) (*p = c, 1)
#endif /* UTF-32 character handling */
#endif /* SUPPORT_UNICODE */
/* Mode-dependent macros that have the same definition in all modes. */
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
code that uses them is simpler because it assumes this. */
/* The real general context structure. At present it holds only data for custom
memory control. */
typedef struct pcre2_real_general_context {
pcre2_memctl memctl;
} pcre2_real_general_context;
/* The real compile context structure */
typedef struct pcre2_real_compile_context {
pcre2_memctl memctl;
int (*stack_guard)(uint32_t, void *);
void *stack_guard_data;
const uint8_t *tables;
PCRE2_SIZE max_pattern_length;
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
} pcre2_real_compile_context;
/* The real match context structure. */
typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl;
#endif
#ifdef SUPPORT_JIT
pcre2_jit_callback jit_callback;
void *jit_callback_data;
#endif
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
uint32_t match_limit;
uint32_t recursion_limit;
} pcre2_real_match_context;
/* The real compiled code structure. The type for the blocksize field is
defined specially because it is required in pcre2_serialize_decode() when
copying the size from possibly unaligned memory into a variable of the same
type. Use a macro rather than a typedef to avoid compiler warnings when this
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
here.) */
#undef CODE_BLOCKSIZE_TYPE
#define CODE_BLOCKSIZE_TYPE size_t
#undef LOOKBEHIND_MAX
#define LOOKBEHIND_MAX UINT16_MAX
typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */
void *executable_jit; /* Pointer to JIT code */
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */
uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_recursion; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */
uint16_t newline_convention; /* What is a newline? */
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
uint16_t minlength; /* Minimum length of match */
uint16_t top_bracket; /* Highest numbered group */
uint16_t top_backref; /* Highest numbered back reference */
uint16_t name_entry_size; /* Size (code units) of table entries */
uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code;
/* The real match data structure. */
typedef struct pcre2_real_match_data {
pcre2_memctl memctl;
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
PCRE2_SPTR mark; /* Pointer to last mark */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
PCRE2_SIZE ovector[1]; /* The first field */
} pcre2_real_match_data;
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
/* These structures are not needed for pcre2test. */
#ifndef PCRE2_PCRE2TEST
/* Structures for checking for mutual recursion when scanning compiled or
parsed code. */
typedef struct recurse_check {
struct recurse_check *prev;
PCRE2_SPTR group;
} recurse_check;
typedef struct parsed_recurse_check {
struct parsed_recurse_check *prev;
uint32_t *groupptr;
} parsed_recurse_check;
/* Structure for building a cache when filling in recursion offsets. */
typedef struct recurse_cache {
PCRE2_SPTR group;
int groupnumber;
} recurse_cache;
/* Structure for maintaining a chain of pointers to the currently incomplete
branches, for testing for left recursion while compiling. */
typedef struct branch_chain {
struct branch_chain *outer;
PCRE2_UCHAR *current_branch;
} branch_chain;
/* Structure for building a list of named groups during the first pass of
compiling. */
typedef struct named_group {
PCRE2_SPTR name; /* Points to the name in the pattern */
uint32_t number; /* Group number */
uint16_t length; /* Length of the name */
uint16_t isdup; /* TRUE if a duplicate */
} named_group;
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
typedef struct compile_block {
pcre2_real_compile_context *cx; /* Points to the compile context */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *cbits; /* Points to character type table */
const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_SPTR start_workspace; /* The start of working space */
PCRE2_SPTR start_code; /* The start of the compiled code */
PCRE2_SPTR start_pattern; /* The start of the pattern */
PCRE2_SPTR end_pattern; /* The end of the pattern */
PCRE2_UCHAR *name_table; /* The name/number table */
PCRE2_SIZE workspace_size; /* Size of workspace */
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */
uint32_t named_group_list_size; /* Number of entries in the list */
uint32_t external_options; /* External (initial) options */
uint32_t external_flags; /* External flag bits to be set */
uint32_t bracount; /* Count of capturing parentheses */
uint32_t lastcapture; /* Last capture encountered */
uint32_t *parsed_pattern; /* Parsed pattern buffer */
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
uint32_t *groupinfo; /* Group info vector */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
uint32_t class_range_start; /* Overall class range start */
uint32_t class_range_end; /* Overall class range end */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL had_recurse; /* Had a recursion or subroutine call */
BOOL dupnames; /* Duplicate names exist */
} compile_block;
/* Structure for keeping the properties of the in-memory stack used
by the JIT matcher. */
typedef struct pcre2_real_jit_stack {
pcre2_memctl memctl;
void* stack;
} pcre2_real_jit_stack;
/* Structure for keeping a chain of heap blocks used for saving ovectors
during pattern recursion when the ovector is larger than can be saved on
the system stack. */
typedef struct ovecsave_frame {
struct ovecsave_frame *next; /* Next frame on free chain */
PCRE2_SIZE saved_ovec[1]; /* First vector element */
} ovecsave_frame;
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */
typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */
PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */
} recursion_info;
/* A similar structure for pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
PCRE2_SPTR subject_position;
uint32_t group_num;
} dfa_recursion_info;
/* Structure for building a chain of data for holding the values of the subject
pointer at the start of each subpattern, so as to detect when an empty string
has been matched by a subpattern - to break infinite loops; used by
pcre2_match(). */
typedef struct eptrblock {
struct eptrblock *epb_prev;
PCRE2_SPTR epb_saved_eptr;
} eptrblock;
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; /* For "stack" frames */
#endif
uint32_t match_call_count; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_recursion; /* As it says */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_SIZE *ovector; /* Pointer to the offset vector */
PCRE2_SIZE offset_end; /* One past the end */
PCRE2_SIZE offset_max; /* The maximum usable for return data */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */
uint16_t name_entry_size; /* Size of entry in names table */
PCRE2_SPTR name_table; /* Table of group names */
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t capture_last; /* Most recent capture number + overflow flag */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
uint32_t match_function_type; /* Set for certain special calls of match() */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
#ifdef HEAP_MATCH_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
functions. */
typedef struct dfa_match_block {
pcre2_memctl memctl; /* For general use */
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
uint32_t match_limit_recursion; /* As it says */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
dfa_recursion_info *recursive; /* Linked list of recursion data */
} dfa_match_block;
#endif /* PCRE2_PCRE2TEST */
/* End of pcre2_intmodedep.h */

11501
thirdparty/pcre2/src/pcre2_jit_compile.c vendored Normal file

File diff suppressed because it is too large Load diff

189
thirdparty/pcre2/src/pcre2_jit_match.c vendored Normal file
View file

@ -0,0 +1,189 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
#error This file must be included from pcre2_jit_compile.c.
#endif
#ifdef SUPPORT_JIT
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
{
sljit_u8 local_space[MACHINE_STACK_SIZE];
struct sljit_stack local_stack;
local_stack.top = (sljit_sw)&local_space;
local_stack.base = local_stack.top;
local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
local_stack.max_limit = local_stack.limit;
arguments->stack = &local_stack;
return executable_func(arguments);
}
#endif
/*************************************************
* Do a JIT pattern match *
*************************************************/
/* This function runs a JIT pattern match.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points to a match context
jit_stack points to a JIT stack
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
-1 => failed to match (PCRE_ERROR_NOMATCH)
< -1 => some kind of unexpected problem
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
{
#ifndef SUPPORT_JIT
(void)code;
(void)subject;
(void)length;
(void)start_offset;
(void)options;
(void)match_data;
(void)mcontext;
return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */
pcre2_real_code *re = (pcre2_real_code *)code;
executable_functions *functions = (executable_functions *)re->executable_jit;
pcre2_jit_stack *jit_stack;
uint32_t oveccount = match_data->oveccount;
uint32_t max_oveccount;
union {
void *executable_func;
jit_function call_executable_func;
} convert_executable_func;
jit_arguments arguments;
int rc;
int index = 0;
if ((options & PCRE2_PARTIAL_HARD) != 0)
index = 2;
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
index = 1;
if (functions->executable_funcs[index] == NULL)
return PCRE2_ERROR_JIT_BADOPTION;
/* Sanity checks should be handled by pcre_exec. */
arguments.str = subject + start_offset;
arguments.begin = subject;
arguments.end = subject + length;
arguments.match_data = match_data;
arguments.startchar_ptr = subject;
arguments.mark_ptr = NULL;
arguments.options = options;
if (mcontext != NULL)
{
arguments.callout = mcontext->callout;
arguments.callout_data = mcontext->callout_data;
arguments.offset_limit = mcontext->offset_limit;
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
if (mcontext->jit_callback != NULL)
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
else
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
}
else
{
arguments.callout = NULL;
arguments.callout_data = NULL;
arguments.offset_limit = PCRE2_UNSET;
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
MATCH_LIMIT : re->limit_match;
jit_stack = NULL;
}
/* JIT only need two offsets for each ovector entry. Hence
the last 1/3 of the ovector will never be touched. */
max_oveccount = functions->top_bracket;
if (oveccount > max_oveccount)
oveccount = max_oveccount;
arguments.oveccount = oveccount << 1;
convert_executable_func.executable_func = functions->executable_funcs[index];
if (jit_stack != NULL)
{
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
rc = convert_executable_func.call_executable_func(&arguments);
}
else
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
if (rc > (int)oveccount)
rc = 0;
match_data->code = re;
match_data->subject = subject;
match_data->rc = rc;
match_data->startchar = arguments.startchar_ptr - subject;
match_data->leftchar = 0;
match_data->rightchar = 0;
match_data->mark = arguments.mark_ptr;
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
return match_data->rc;
#endif /* SUPPORT_JIT */
}
/* End of pcre2_jit_match.c */

227
thirdparty/pcre2/src/pcre2_jit_misc.c vendored Normal file
View file

@ -0,0 +1,227 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
#error This file must be included from pcre2_jit_compile.c.
#endif
/*************************************************
* Free JIT read-only data *
*************************************************/
void
PRIV(jit_free_rodata)(void *current, void *allocator_data)
{
#ifndef SUPPORT_JIT
(void)current;
(void)allocator_data;
#else /* SUPPORT_JIT */
void *next;
SLJIT_UNUSED_ARG(allocator_data);
while (current != NULL)
{
next = *(void**)current;
SLJIT_FREE(current, allocator_data);
current = next;
}
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free JIT compiled code *
*************************************************/
void
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
{
#ifndef SUPPORT_JIT
(void)executable_jit;
(void)memctl;
#else /* SUPPORT_JIT */
executable_functions *functions = (executable_functions *)executable_jit;
void *allocator_data = memctl;
int i;
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
{
if (functions->executable_funcs[i] != NULL)
sljit_free_code(functions->executable_funcs[i]);
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
}
SLJIT_FREE(functions, allocator_data);
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free unused JIT memory *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
{
#ifndef SUPPORT_JIT
(void)gcontext; /* Suppress warning */
#else /* SUPPORT_JIT */
SLJIT_UNUSED_ARG(gcontext);
sljit_free_unused_memory_exec();
#endif /* SUPPORT_JIT */
}
/*************************************************
* Allocate a JIT stack *
*************************************************/
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
pcre2_general_context *gcontext)
{
#ifndef SUPPORT_JIT
(void)gcontext;
(void)startsize;
(void)maxsize;
return NULL;
#else /* SUPPORT_JIT */
pcre2_jit_stack *jit_stack;
if (startsize < 1 || maxsize < 1)
return NULL;
if (startsize > maxsize)
startsize = maxsize;
startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
if (jit_stack == NULL) return NULL;
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
return jit_stack;
#endif
}
/*************************************************
* Assign a JIT stack to a pattern *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
void *callback_data)
{
#ifndef SUPPORT_JIT
(void)mcontext;
(void)callback;
(void)callback_data;
#else /* SUPPORT_JIT */
if (mcontext == NULL) return;
mcontext->jit_callback = callback;
mcontext->jit_callback_data = callback_data;
#endif /* SUPPORT_JIT */
}
/*************************************************
* Free a JIT stack *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
{
#ifndef SUPPORT_JIT
(void)jit_stack;
#else /* SUPPORT_JIT */
if (jit_stack != NULL)
{
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
}
#endif /* SUPPORT_JIT */
}
/*************************************************
* Get target CPU type *
*************************************************/
const char*
PRIV(jit_get_target)(void)
{
#ifndef SUPPORT_JIT
return "JIT is not supported";
#else /* SUPPORT_JIT */
return sljit_get_platform_name();
#endif /* SUPPORT_JIT */
}
/*************************************************
* Get size of JIT code *
*************************************************/
size_t
PRIV(jit_get_size)(void *executable_jit)
{
#ifndef SUPPORT_JIT
(void)executable_jit;
return 0;
#else /* SUPPORT_JIT */
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
#endif
}
/* End of pcre2_jit_misc.c */

1735
thirdparty/pcre2/src/pcre2_jit_test.c vendored Normal file

File diff suppressed because it is too large Load diff

157
thirdparty/pcre2/src/pcre2_maketables.c vendored Normal file
View file

@ -0,0 +1,157 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains the external function pcre2_maketables(), which builds
character tables for PCRE2 in the current locale. The file is compiled on its
own as part of the PCRE2 library. However, it is also included in the
compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
# include "pcre2_internal.h"
#endif
/*************************************************
* Create PCRE2 character tables *
*************************************************/
/* This function builds a set of character tables for use by PCRE2 and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
program) malloc() is used, and the function has a different name so as not to
clash with the prototype in pcre2.h.
Arguments: none when DFTABLES is defined
else a PCRE2 general context or NULL
Returns: pointer to the contiguous block of data
*/
#ifdef DFTABLES /* Included in freestanding dftables.c program */
static const uint8_t *maketables(void)
{
uint8_t *yield = (uint8_t *)malloc(tables_length);
#else /* Not DFTABLES, compiling the library */
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
pcre2_maketables(pcre2_general_context *gcontext)
{
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
malloc(tables_length));
#endif /* DFTABLES */
int i;
uint8_t *p;
if (yield == NULL) return NULL;
p = yield;
/* First comes the lower casing table */
for (i = 0; i < 256; i++) *p++ = tolower(i);
/* Next the case-flipping table */
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
From release 8.34 is is also correct for Perl space, because Perl added VT at
release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
at release 8.34. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcre2_maketables.c */

7238
thirdparty/pcre2/src/pcre2_match.c vendored Normal file

File diff suppressed because it is too large Load diff

147
thirdparty/pcre2/src/pcre2_match_data.c vendored Normal file
View file

@ -0,0 +1,147 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Create a match data block given ovector size *
*************************************************/
/* A minimum of 1 is imposed on the number of ovector triplets. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
yield = PRIV(memctl_malloc)(
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
return yield;
}
/*************************************************
* Create a match data block using pattern data *
*************************************************/
/* If no context is supplied, use the memory allocator from the code. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
pcre2_general_context *gcontext)
{
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext);
}
/*************************************************
* Free a match data block *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
match_data->memctl.free(match_data, match_data->memctl.memory_data);
}
/*************************************************
* Get last mark in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
pcre2_get_mark(pcre2_match_data *match_data)
{
return match_data->mark;
}
/*************************************************
* Get pointer to ovector *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
{
return match_data->ovector;
}
/*************************************************
* Get number of ovector slots *
*************************************************/
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
pcre2_get_ovector_count(pcre2_match_data *match_data)
{
return match_data->oveccount;
}
/*************************************************
* Get starting code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_startchar(pcre2_match_data *match_data)
{
return match_data->startchar;
}
/* End of pcre2_match_data.c */

243
thirdparty/pcre2/src/pcre2_newline.c vendored Normal file
View file

@ -0,0 +1,243 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE2 supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
#ifdef SUPPORT_UNICODE
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* This function is called only via the WAS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
value of ptr is greater than the start of the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf TRUE if in utf mode
Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
ptr--;
#ifdef SUPPORT_UNICODE
if (utf)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UNICODE */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
#ifdef EBCDIC
case CHAR_NEL:
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
return FALSE;
}
}
/* End of pcre2_newline.c */

120
thirdparty/pcre2/src/pcre2_ord2utf.c vendored Normal file
View file

@ -0,0 +1,120 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This file contains a function that converts a Unicode character code point
into a UTF string. The behaviour is different for each code unit width. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/* If SUPPORT_UNICODE is not defined, this function will never be called.
Supply a dummy function because some compilers do not like empty source
modules. */
#ifndef SUPPORT_UNICODE
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
(void)(cvalue);
(void)(buffer);
return 0;
}
#else /* SUPPORT_UNICODE */
/*************************************************
* Convert code point to UTF *
*************************************************/
/*
Arguments:
cvalue the character value
buffer pointer to buffer for result
Returns: number of code units placed in the buffer
*/
unsigned int
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
{
/* Convert to UTF-8 */
#if PCRE2_CODE_UNIT_WIDTH == 8
int i, j;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = PRIV(utf8_table2)[i] | cvalue;
return i + 1;
/* Convert to UTF-16 */
#elif PCRE2_CODE_UNIT_WIDTH == 16
if (cvalue <= 0xffff)
{
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
}
cvalue -= 0x10000;
*buffer++ = 0xd800 | (cvalue >> 10);
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;
/* Convert to UTF-32 */
#else
*buffer = (PCRE2_UCHAR)cvalue;
return 1;
#endif
}
#endif /* SUPPORT_UNICODE */
/* End of pcre_ord2utf.c */

View file

@ -0,0 +1,410 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/*
Arguments:
code points to compiled code
what what information is required
where where to put the information; if NULL, return length
Returns: 0 when data returned
> 0 when length requested
< 0 on error or unset value
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
{
const pcre2_real_code *re = (pcre2_real_code *)code;
if (where == NULL) /* Requests field length */
{
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
case PCRE2_INFO_ARGOPTIONS:
case PCRE2_INFO_BACKREFMAX:
case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT:
case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT:
case PCRE2_INFO_MATCHEMPTY:
case PCRE2_INFO_MATCHLIMIT:
case PCRE2_INFO_MAXLOOKBEHIND:
case PCRE2_INFO_MINLENGTH:
case PCRE2_INFO_NAMEENTRYSIZE:
case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE:
case PCRE2_INFO_RECURSIONLIMIT:
return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP:
return sizeof(const uint8_t *);
case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE:
return sizeof(size_t);
case PCRE2_INFO_NAMETABLE:
return sizeof(PCRE2_SPTR);
}
}
if (re == NULL) return PCRE2_ERROR_NULL;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
switch(what)
{
case PCRE2_INFO_ALLOPTIONS:
*((uint32_t *)where) = re->overall_options;
break;
case PCRE2_INFO_ARGOPTIONS:
*((uint32_t *)where) = re->compile_options;
break;
case PCRE2_INFO_BACKREFMAX:
*((uint32_t *)where) = re->top_backref;
break;
case PCRE2_INFO_BSR:
*((uint32_t *)where) = re->bsr_convention;
break;
case PCRE2_INFO_CAPTURECOUNT:
*((uint32_t *)where) = re->top_bracket;
break;
case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
break;
case PCRE2_INFO_FIRSTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
re->first_codeunit : 0;
break;
case PCRE2_INFO_FIRSTBITMAP:
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
&(re->start_bitmap[0]) : NULL;
break;
case PCRE2_INFO_HASBACKSLASHC:
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
break;
case PCRE2_INFO_HASCRORLF:
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break;
case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break;
case PCRE2_INFO_JITSIZE:
#ifdef SUPPORT_JIT
*((size_t *)where) = (re->executable_jit != NULL)?
PRIV(jit_get_size)(re->executable_jit) : 0;
#else
*((size_t *)where) = 0;
#endif
break;
case PCRE2_INFO_LASTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
break;
case PCRE2_INFO_LASTCODEUNIT:
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
re->last_codeunit : 0;
break;
case PCRE2_INFO_MATCHEMPTY:
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
break;
case PCRE2_INFO_MATCHLIMIT:
*((uint32_t *)where) = re->limit_match;
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_MAXLOOKBEHIND:
*((uint32_t *)where) = re->max_lookbehind;
break;
case PCRE2_INFO_MINLENGTH:
*((uint32_t *)where) = re->minlength;
break;
case PCRE2_INFO_NAMEENTRYSIZE:
*((uint32_t *)where) = re->name_entry_size;
break;
case PCRE2_INFO_NAMECOUNT:
*((uint32_t *)where) = re->name_count;
break;
case PCRE2_INFO_NAMETABLE:
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
break;
case PCRE2_INFO_NEWLINE:
*((uint32_t *)where) = re->newline_convention;
break;
case PCRE2_INFO_RECURSIONLIMIT:
*((uint32_t *)where) = re->limit_recursion;
if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_SIZE:
*((size_t *)where) = re->blocksize;
break;
default: return PCRE2_ERROR_BADOPTION;
}
return 0;
}
/*************************************************
* Callout enumerator *
*************************************************/
/*
Arguments:
code points to compiled code
callback function called for each callout block
callout_data user data passed to the callback
Returns: 0 when successfully completed
< 0 on local error
!= 0 for callback error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_callout_enumerate(const pcre2_code *code,
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
{
pcre2_real_code *re = (pcre2_real_code *)code;
pcre2_callout_enumerate_block cb;
PCRE2_SPTR cc;
#ifdef SUPPORT_UNICODE
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
#endif
if (re == NULL) return PCRE2_ERROR_NULL;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
/* Check that this pattern was compiled in the correct bit mode */
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
cb.version = 0;
cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
+ re->name_count * re->name_entry_size;
while (TRUE)
{
int rc;
switch (*cc)
{
case OP_END:
return 0;
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_UPTO:
case OP_MINUPTO:
case OP_EXACT:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_POSQUERY:
case OP_POSUPTO:
case OP_STARI:
case OP_MINSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_EXACTI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_POSQUERYI:
case OP_POSUPTOI:
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTEXACT:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTPOSQUERY:
case OP_NOTPOSUPTO:
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTEXACTI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERYI:
case OP_NOTPOSUPTOI:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
case OP_TYPEPOSQUERY:
case OP_TYPEPOSUPTO:
cc += PRIV(OP_lengths)[*cc];
#ifdef SUPPORT_UNICODE
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
#endif
break;
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS:
cc += GET(cc, 1);
break;
#endif
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
cc += PRIV(OP_lengths)[*cc] + cc[1];
break;
case OP_CALLOUT:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = cc[1 + 2*LINK_SIZE];
cb.callout_string_offset = 0;
cb.callout_string_length = 0;
cb.callout_string = NULL;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += PRIV(OP_lengths)[*cc];
break;
case OP_CALLOUT_STR:
cb.pattern_position = GET(cc, 1);
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
cb.callout_number = 0;
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
cb.callout_string_length =
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
rc = callback(&cb, callout_data);
if (rc != 0) return rc;
cc += GET(cc, 1 + 2*LINK_SIZE);
break;
default:
cc += PRIV(OP_lengths)[*cc];
break;
}
}
}
/* End of pcre2_pattern_info.c */

832
thirdparty/pcre2/src/pcre2_printint.c vendored Normal file
View file

@ -0,0 +1,832 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. It can also optionally be included in
pcre2_compile.c for detailed debugging in error situations. */
/* Tables of operator names. The same 8-bit table is used for all code unit
widths, so it must be defined only once. The list itself is defined in
pcre2_internal.h, which is #included by pcre2test before this file. */
#ifndef OP_LISTS_DEFINED
static const char *OP_names[] = { OP_NAME_LIST };
#define OP_LISTS_DEFINED
#endif
/* The functions and tables herein must all have mode-dependent names. */
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
#define print_char PCRE2_SUFFIX(print_char_)
#define print_custring PCRE2_SUFFIX(print_custring_)
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
#define print_prop PCRE2_SUFFIX(print_prop_)
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
The contents of the table are, however, mode-dependent. */
static const uint8_t OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Print one character from a string *
*************************************************/
/* In UTF mode the character may occupy more than one code unit.
Arguments:
f file to write to
ptr pointer to first code unit of the character
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
Returns: number of additional code units used
*/
static unsigned int
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
{
uint32_t c = *ptr;
BOOL one_code_unit = !utf;
/* If UTF is supported and requested, check for a valid single code unit. */
#ifdef SUPPORT_UNICODE
if (utf)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
one_code_unit = c < 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
one_code_unit = (c & 0xfc00) != 0xd800;
#else
one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
/* Handle a valid one-code-unit character at any width. */
if (one_code_unit)
{
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
}
/* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UNICODE
return 0;
#else
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
\X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if ((c & 0xc0) != 0xc0)
{
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
return 0;
}
else
{
int i;
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & PRIV(utf8_table3)[a]) << s;
for (i = 1; i <= a; i++)
{
if ((ptr[i] & 0xc0) != 0x80)
{
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
return i - 1;
}
s -= 6;
c |= (ptr[i] & 0x3f) << s;
}
fprintf(f, "\\x{%x}", c);
return a;
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
Print it with \X instead of \x as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 16
if ((ptr[1] & 0xfc00) != 0xdc00)
{
fprintf(f, "\\X{%x}", c);
return 0;
}
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
fprintf(f, "\\x{%x}", c);
return 1;
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
/* For UTF-32 we get here only for a malformed code unit, which should only
occur if the sanity check has been turned off. Print it with \X instead of \x
as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 32
fprintf(f, "\\X{%x}", c);
return 0;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print string as a list of code units *
*************************************************/
/* These take no account of UTF as they always print each individual code unit.
The string is zero-terminated for print_custring(); the length is given for
print_custring_bylen().
Arguments:
f file to write to
ptr point to the string
len length for print_custring_bylen()
Returns: nothing
*/
static void
print_custring(FILE *f, PCRE2_SPTR ptr)
{
while (*ptr != '\0')
{
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
static void
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
{
for (; len > 0; len--)
{
uint32_t c = *ptr++;
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
}
}
/*************************************************
* Find Unicode property name *
*************************************************/
/* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function. */
static const char *
get_ucpname(unsigned int ptype, unsigned int pvalue)
{
#ifdef SUPPORT_UNICODE
int i;
for (i = PRIV(utt_size) - 1; i >= 0; i--)
{
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
}
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
#else /* No UTF support */
(void)ptype;
(void)pvalue;
return "??";
#endif /* SUPPORT_UNICODE */
}
/*************************************************
* Print Unicode property value *
*************************************************/
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
pseudo-property that contains a pointer to a list of case-equivalent
characters.
Arguments:
f file to write to
code pointer in the compiled code
before text to print before
after text to print after
Returns: nothing
*/
static void
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
{
if (code[1] != PT_CLIST)
{
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
code[2]), after);
}
else
{
const char *not = (*code == OP_PROP)? "" : "not ";
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
fprintf (f, "%s%sclist", before, not);
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
fprintf(f, "%s", after);
}
}
/*************************************************
* Print compiled pattern *
*************************************************/
/* The print_lengths flag controls whether offsets and lengths of items are
printed. Lenths can be turned off from pcre2test so that automatic tests on
bytecode can be written that do not depend on the value of LINK_SIZE.
Arguments:
re a compiled pattern
f the file to write to
print_lengths show various lengths
Returns: nothing
*/
static void
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
{
PCRE2_SPTR codestart, nametable, code;
uint32_t nesize = re->name_entry_size;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
code = codestart = nametable + re->name_count * re->name_entry_size;
for(;;)
{
PCRE2_SPTR ccode;
uint32_t c;
int i;
const char *flag = " ";
unsigned int extra = 0;
if (print_lengths)
fprintf(f, "%3d ", (int)(code - codestart));
else
fprintf(f, " ");
switch(*code)
{
/* ========================================================================== */
/* These cases are never obeyed. This is a fudge that causes a compile-
time error if the vectors OP_names or OP_lengths, which are indexed
by opcode, are not the correct length. It seems to be the only way to do
such a check at compile time, as the sizeof() operator does not work in
the C preprocessor. */
case OP_TABLE_LENGTH:
case OP_TABLE_LENGTH +
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
break;
/* ========================================================================== */
case OP_END:
fprintf(f, " %s\n", OP_names[*code]);
fprintf(f, "------------------------------------------------------------------\n");
return;
case OP_CHAR:
fprintf(f, " ");
do
{
code++;
code += 1 + print_char(f, code, utf);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
case OP_CHARI:
fprintf(f, " /i ");
do
{
code++;
code += 1 + print_char(f, code, utf);
}
while (*code == OP_CHARI);
fprintf(f, "\n");
continue;
case OP_CBRA:
case OP_CBRAPOS:
case OP_SCBRA:
case OP_SCBRAPOS:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
break;
case OP_BRA:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_KETRPOS:
case OP_ALT:
case OP_KET:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_ONCE_NC:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_CLOSE:
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
break;
case OP_CREF:
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_DNCREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond ref <", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
case OP_RREF:
c = GET2(code, 1);
if (c == RREF_ANY)
fprintf(f, " Cond recurse any");
else
fprintf(f, " Cond recurse %d", c);
break;
case OP_DNRREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s Cond recurse <", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
break;
case OP_FALSE:
fprintf(f, " Cond false");
break;
case OP_TRUE:
fprintf(f, " Cond true");
break;
case OP_STARI:
case OP_MINSTARI:
case OP_POSSTARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_POSQUERYI:
flag = "/i";
/* Fall through */
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " %s ", flag);
if (*code >= OP_TYPESTAR)
{
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
{
print_prop(f, code + 1, "", " ");
extra = 2;
}
else fprintf(f, "%s", OP_names[code[1]]);
}
else extra = print_char(f, code+1, utf);
fprintf(f, "%s", OP_names[*code]);
break;
case OP_EXACTI:
case OP_UPTOI:
case OP_MINUPTOI:
case OP_POSUPTOI:
flag = "/i";
/* Fall through */
case OP_EXACT:
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
fprintf(f, " %s ", flag);
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
fprintf(f, "{");
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?");
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+");
break;
case OP_TYPEEXACT:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
{
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
extra = 2;
}
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
break;
case OP_NOTI:
flag = "/i";
/* Fall through */
case OP_NOT:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1, utf);
fprintf(f, "]");
break;
case OP_NOTSTARI:
case OP_NOTMINSTARI:
case OP_NOTPOSSTARI:
case OP_NOTPLUSI:
case OP_NOTMINPLUSI:
case OP_NOTPOSPLUSI:
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTPOSQUERYI:
flag = "/i";
/* Fall through */
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPOSSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTPOSPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
case OP_NOTPOSQUERY:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1, utf);
fprintf(f, "]%s", OP_names[*code]);
break;
case OP_NOTEXACTI:
case OP_NOTUPTOI:
case OP_NOTMINUPTOI:
case OP_NOTPOSUPTOI:
flag = "/i";
/* Fall through */
case OP_NOTEXACT:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:
fprintf(f, " %s [^", flag);
extra = print_char(f, code + 1 + IMM2_SIZE, utf);
fprintf(f, "]{");
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
else
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+");
break;
case OP_RECURSE:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_REFI:
flag = "/i";
/* Fall through */
case OP_REF:
fprintf(f, " %s \\%d", flag, GET2(code,1));
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_DNREFI:
flag = "/i";
/* Fall through */
case OP_DNREF:
{
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
fprintf(f, " %s \\k<", flag);
print_custring(f, entry);
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
}
ccode = code + OP_lengths[*code];
goto CLASS_REF_REPEAT;
case OP_CALLOUT:
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
GET(code, 1), GET(code, 1 + LINK_SIZE));
break;
case OP_CALLOUT_STR:
c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
c = PRIV(callout_end_delims)[i];
break;
}
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
GET(code, 1 + LINK_SIZE));
break;
case OP_PROP:
case OP_NOTPROP:
print_prop(f, code, " ", "");
break;
/* OP_XCLASS cannot occur in 8-bit, non-UTF mode. However, there's no harm
in having this code always here, and it makes it less messy without all
those #ifdefs. */
case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
{
unsigned int min, max;
BOOL printmap;
BOOL invertmap = FALSE;
uint8_t *map;
uint8_t inverted_map[32];
fprintf(f, " [");
if (*code == OP_XCLASS)
{
extra = GET(code, 1);
ccode = code + LINK_SIZE + 1;
printmap = (*ccode & XCL_MAP) != 0;
if ((*ccode & XCL_NOT) != 0)
{
invertmap = (*ccode & XCL_HASPROP) == 0;
fprintf(f, "^");
}
ccode++;
}
else
{
printmap = TRUE;
ccode = code + 1;
}
/* Print a bit map */
if (printmap)
{
map = (uint8_t *)ccode;
if (invertmap)
{
for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
map = inverted_map;
}
for (i = 0; i < 256; i++)
{
if ((map[i/8] & (1 << (i&7))) != 0)
{
int j;
for (j = i+1; j < 256; j++)
if ((map[j/8] & (1 << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (PRINTABLE(i)) fprintf(f, "%c", i);
else fprintf(f, "\\x%02x", i);
if (--j > i)
{
if (j != i + 1) fprintf(f, "-");
if (j == '-' || j == ']') fprintf(f, "\\");
if (PRINTABLE(j)) fprintf(f, "%c", j);
else fprintf(f, "\\x%02x", j);
}
i = j;
}
}
ccode += 32 / sizeof(PCRE2_UCHAR);
}
/* For an XCLASS there is always some additional data */
if (*code == OP_XCLASS)
{
PCRE2_UCHAR ch;
while ((ch = *ccode++) != XCL_END)
{
BOOL not = FALSE;
const char *notch = "";
switch(ch)
{
case XCL_NOTPROP:
not = TRUE;
notch = "^";
/* Fall through */
case XCL_PROP:
{
unsigned int ptype = *ccode++;
unsigned int pvalue = *ccode++;
switch(ptype)
{
case PT_PXGRAPH:
fprintf(f, "[:%sgraph:]", notch);
break;
case PT_PXPRINT:
fprintf(f, "[:%sprint:]", notch);
break;
case PT_PXPUNCT:
fprintf(f, "[:%spunct:]", notch);
break;
default:
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
get_ucpname(ptype, pvalue));
break;
}
}
break;
default:
ccode += 1 + print_char(f, ccode, utf);
if (ch == XCL_RANGE)
{
fprintf(f, "-");
ccode += 1 + print_char(f, ccode, utf);
}
break;
}
}
}
/* Indicate a non-UTF class which was created by negation */
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
/* Handle repeats after a class or a back reference */
CLASS_REF_REPEAT:
switch(*ccode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
case OP_CRPOSQUERY:
fprintf(f, "%s", OP_names[*ccode]);
extra += OP_lengths[*ccode];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
min = GET2(ccode,1);
max = GET2(ccode,1 + IMM2_SIZE);
if (max == 0) fprintf(f, "{%u,}", min);
else fprintf(f, "{%u,%u}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
extra += OP_lengths[*ccode];
break;
/* Do nothing if it's not a repeat; this code stops picky compilers
warning about the lack of a default code path. */
default:
break;
}
}
break;
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
fprintf(f, " %s ", OP_names[*code]);
print_custring_bylen(f, code + 2, code[1]);
extra += code[1];
break;
case OP_THEN:
fprintf(f, " %s", OP_names[*code]);
break;
case OP_CIRCM:
case OP_DOLLM:
flag = "/m";
/* Fall through */
/* Anything else is just an item with no data, but possibly a flag. */
default:
fprintf(f, " %s %s", flag, OP_names[*code]);
break;
}
code += OP_lengths[*code] + extra;
fprintf(f, "\n");
}
}
/* End of pcre2_printint.c */

265
thirdparty/pcre2/src/pcre2_serialize.c vendored Normal file
View file

@ -0,0 +1,265 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains functions for serializing and deserializing
a sequence of compiled codes. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/* Magic number to provide a small check against being handed junk. */
#define SERIALIZED_DATA_MAGIC 0x50523253u
/* Deserialization is limited to the current PCRE version and
character width. */
#define SERIALIZED_DATA_VERSION \
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
#define SERIALIZED_DATA_CONFIG \
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
/*************************************************
* Serialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
pcre2_general_context *gcontext)
{
uint8_t *bytes;
uint8_t *dst_bytes;
int32_t i;
PCRE2_SIZE total_size;
const pcre2_real_code *re;
const uint8_t *tables;
pcre2_serialized_data *data;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
/* Compute total size. */
total_size = sizeof(pcre2_serialized_data) + tables_length;
tables = NULL;
for (i = 0; i < number_of_codes; i++)
{
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
re = (const pcre2_real_code *)(codes[i]);
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
if (tables == NULL)
tables = re->tables;
else if (tables != re->tables)
return PCRE2_ERROR_MIXEDTABLES;
total_size += re->blocksize;
}
/* Initialize the byte stream. */
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
/* The controller is stored as a hidden parameter. */
memcpy(bytes, memctl, sizeof(pcre2_memctl));
bytes += sizeof(pcre2_memctl);
data = (pcre2_serialized_data *)bytes;
data->magic = SERIALIZED_DATA_MAGIC;
data->version = SERIALIZED_DATA_VERSION;
data->config = SERIALIZED_DATA_CONFIG;
data->number_of_codes = number_of_codes;
/* Copy all compiled code data. */
dst_bytes = bytes + sizeof(pcre2_serialized_data);
memcpy(dst_bytes, tables, tables_length);
dst_bytes += tables_length;
for (i = 0; i < number_of_codes; i++)
{
re = (const pcre2_real_code *)(codes[i]);
memcpy(dst_bytes, (char *)re, re->blocksize);
dst_bytes += re->blocksize;
}
*serialized_bytes = bytes;
*serialized_size = total_size;
return number_of_codes;
}
/*************************************************
* Deserialize compiled patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
const uint8_t *bytes, pcre2_general_context *gcontext)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
const pcre2_memctl *memctl = (gcontext != NULL) ?
&gcontext->memctl : &PRIV(default_compile_context).memctl;
const uint8_t *src_bytes;
pcre2_real_code *dst_re;
uint8_t *tables;
int32_t i, j;
/* Sanity checks. */
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
if (number_of_codes > data->number_of_codes)
number_of_codes = data->number_of_codes;
src_bytes = bytes + sizeof(pcre2_serialized_data);
/* Decode tables. The reference count for the tables is stored immediately
following them. */
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(tables, src_bytes, tables_length);
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
src_bytes += tables_length;
/* Decode the byte stream. We must not try to read the size from the compiled
code block in the stream, because it might be unaligned, which causes errors on
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
of the blocksize field is given its own name to ensure that it is the same here
as in the block. */
for (i = 0; i < number_of_codes; i++)
{
CODE_BLOCKSIZE_TYPE blocksize;
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
sizeof(CODE_BLOCKSIZE_TYPE));
if (blocksize <= sizeof(pcre2_real_code))
return PCRE2_ERROR_BADSERIALIZEDDATA;
/* The allocator provided by gcontext replaces the original one. */
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
(pcre2_memctl *)gcontext);
if (dst_re == NULL)
{
memctl->free(tables, memctl->memory_data);
for (j = 0; j < i; j++)
{
memctl->free(codes[j], memctl->memory_data);
codes[j] = NULL;
}
return PCRE2_ERROR_NOMEMORY;
}
/* The new allocator must be preserved. */
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
if (dst_re->magic_number != MAGIC_NUMBER ||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
dst_re->name_count > MAX_NAME_COUNT)
return PCRE2_ERROR_BADSERIALIZEDDATA;
/* At the moment only one table is supported. */
dst_re->tables = tables;
dst_re->executable_jit = NULL;
dst_re->flags |= PCRE2_DEREF_TABLES;
codes[i] = dst_re;
src_bytes += blocksize;
}
return number_of_codes;
}
/*************************************************
* Get the number of serialized patterns *
*************************************************/
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
{
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
if (data == NULL) return PCRE2_ERROR_NULL;
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
return data->number_of_codes;
}
/*************************************************
* Free the allocated stream *
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_serialize_free(uint8_t *bytes)
{
if (bytes != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/* End of pcre2_serialize.c */

View file

@ -0,0 +1,201 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for comparing and finding the length
of strings. These are used instead of strcmp() etc because the standard
functions work only on 8-bit data. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Compare two zero-terminated PCRE2 strings *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
Returns: 0, 1, or -1
*/
int
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
{
PCRE2_UCHAR c1, c2;
while (*str1 != '\0' || *str2 != '\0')
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare two PCRE2 strings, given a length *
*************************************************/
/*
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Compare PCRE2 string to 8-bit string by length *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
str1 first string
str2 second string
len the length
Returns: 0, 1, or -1
*/
int
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
{
PCRE2_UCHAR c1, c2;
for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
if (c1 != c2) return ((c1 > c2) << 1) - 1;
}
return 0;
}
/*************************************************
* Find the length of a PCRE2 string *
*************************************************/
/*
Argument: the string
Returns: the length
*/
PCRE2_SIZE
PRIV(strlen)(PCRE2_SPTR str)
{
PCRE2_SIZE c = 0;
while (*str++ != 0) c++;
return c;
}
/*************************************************
* Copy 8-bit 0-terminated string to PCRE2 string *
*************************************************/
/* Arguments:
str1 buffer to receive the string
str2 8-bit string to be copied
Returns: the number of code units used (excluding trailing zero)
*/
PCRE2_SIZE
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
{
PCRE2_UCHAR *t = str1;
while (*str2 != 0) *t++ = *str2++;
*t = 0;
return t - str1;
}
/* End of pcre2_string_utils.c */

1625
thirdparty/pcre2/src/pcre2_study.c vendored Normal file

File diff suppressed because it is too large Load diff

858
thirdparty/pcre2/src/pcre2_substitute.c vendored Normal file
View file

@ -0,0 +1,858 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#define PTR_STACK_SIZE 20
#define SUBSTITUTE_OPTIONS \
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
PCRE2_SUBSTITUTE_UNSET_EMPTY)
/*************************************************
* Find end of substitute text *
*************************************************/
/* In extended mode, we recognize ${name:+set text:unset text} and similar
constructions. This requires the identification of unescaped : and }
characters. This function scans for such. It must deal with nested ${
constructions. The pointer to the text is updated, either to the required end
character, or to where an error was detected.
Arguments:
code points to the compiled expression (for options)
ptrptr points to the pointer to the start of the text (updated)
ptrend end of the whole string
last TRUE if the last expected string (only } recognized)
Returns: 0 on success
negative error code on failure
*/
static int
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
BOOL last)
{
int rc = 0;
uint32_t nestlevel = 0;
BOOL literal = FALSE;
PCRE2_SPTR ptr = *ptrptr;
for (; ptr < ptrend; ptr++)
{
if (literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
{
literal = FALSE;
ptr += 1;
}
}
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
{
if (nestlevel == 0) goto EXIT;
nestlevel--;
}
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
else if (*ptr == CHAR_DOLLAR_SIGN)
{
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
nestlevel++;
ptr += 1;
}
}
else if (*ptr == CHAR_BACKSLASH)
{
int erc;
int errorcode;
uint32_t ch;
if (ptr < ptrend - 1) switch (ptr[1])
{
case CHAR_L:
case CHAR_l:
case CHAR_U:
case CHAR_u:
ptr += 1;
continue;
}
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
rc = errorcode;
goto EXIT;
}
switch(erc)
{
case 0: /* Data character */
case ESC_E: /* Isolated \E is ignored */
break;
case ESC_Q:
literal = TRUE;
break;
default:
rc = PCRE2_ERROR_BADREPESCAPE;
goto EXIT;
}
}
}
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
EXIT:
*ptrptr = ptr;
return rc;
}
/*************************************************
* Match and substitute *
*************************************************/
/* This function applies a compiled re to a subject string and creates a new
string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
Arguments:
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block, or is NULL
context points a PCRE2 context
replacement points to the replacement string
rlength length of replacement string
buffer where to put the substituted string
blength points to length of buffer; updated to length of string
Returns: >= 0 number of substitutions made
< 0 an error code
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
*/
/* This macro checks for space in the buffer before copying into it. On
overflow, either give an error immediately, or keep on, accumulating the
length. */
#define CHECKMEMCPY(from,length) \
if (!overflowed && lengthleft < length) \
{ \
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
overflowed = TRUE; \
extra_needed = length - lengthleft; \
} \
else if (overflowed) \
{ \
extra_needed += length; \
} \
else \
{ \
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
buff_offset += length; \
lengthleft -= length; \
}
/* Here's the function */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
{
int rc;
int subs;
int forcecase = 0;
int forcecasereset = 0;
uint32_t ovector_count;
uint32_t goptions = 0;
uint32_t suboptions;
BOOL match_data_created = FALSE;
BOOL literal = FALSE;
BOOL overflowed = FALSE;
#ifdef SUPPORT_UNICODE
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
#endif
PCRE2_UCHAR temp[6];
PCRE2_SPTR ptr;
PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
/* Partial matching is not valid. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
/* If no match data block is provided, create one. */
if (match_data == NULL)
{
pcre2_general_context *gcontext = (mcontext == NULL)?
(pcre2_general_context *)code :
(pcre2_general_context *)mcontext;
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
match_data_created = TRUE;
}
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
/* Find lengths of zero-terminated strings and the end of the replacement. */
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
repend = replacement + rlength;
/* Check UTF replacement string if necessary. */
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
if (rc != 0)
{
match_data->leftchar = 0;
goto EXIT;
}
}
#endif /* SUPPORT_UNICODE */
/* Save the substitute options and remove them from the match options. */
suboptions = options & SUBSTITUTE_OPTIONS;
options &= ~SUBSTITUTE_OPTIONS;
/* Copy up to the start offset */
if (start_offset > length)
{
match_data->leftchar = 0;
rc = PCRE2_ERROR_BADOFFSET;
goto EXIT;
}
CHECKMEMCPY(subject, start_offset);
/* Loop for global substituting. */
subs = 0;
do
{
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
uint32_t ptrstackptr = 0;
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
#ifdef SUPPORT_UNICODE
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
#endif
/* Any error other than no match returns the error code. No match when not
doing the special after-empty-match global rematch, or when at the end of the
subject, breaks the global loop. Otherwise, advance the starting point by one
character, copying it to the output, and try again. */
if (rc < 0)
{
PCRE2_SIZE save_start;
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
if (goptions == 0 || start_offset >= length) break;
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
we have advanced into the middle of it, advance one more code point. In
other words, do not start in the middle of CRLF, even if CR and LF on their
own are valid newlines. */
save_start = start_offset++;
if (subject[start_offset-1] == CHAR_CR &&
code->newline_convention != PCRE2_NEWLINE_CR &&
code->newline_convention != PCRE2_NEWLINE_LF &&
start_offset < length &&
subject[start_offset] == CHAR_LF)
start_offset++;
/* Otherwise, in UTF mode, advance past any secondary code points. */
else if ((code->overall_options & PCRE2_UTF) != 0)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
start_offset++;
#elif PCRE2_CODE_UNIT_WIDTH == 16
while (start_offset < length &&
(subject[start_offset] & 0xfc00) == 0xdc00)
start_offset++;
#endif
}
/* Copy what we have advanced past, reset the special global options, and
continue to the next match. */
fraglength = start_offset - save_start;
CHECKMEMCPY(subject + save_start, fraglength);
goptions = 0;
continue;
}
/* Handle a successful match. Matches that use \K to end before they start
are not supported. */
if (ovector[1] < ovector[0])
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
if (subs == INT_MAX)
{
rc = PCRE2_ERROR_TOOMANYREPLACE;
goto EXIT;
}
subs++;
/* Copy the text leading up to the match. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
/* Process the replacement string. Literal mode is set by \Q, but only in
extended mode when backslashes are being interpreted. In extended mode we
must handle nested substrings that are to be reprocessed. */
ptr = replacement;
for (;;)
{
uint32_t ch;
unsigned int chlen;
/* If at the end of a nested substring, pop the stack. */
if (ptr >= repend)
{
if (ptrstackptr <= 0) break; /* End of replacement string */
repend = ptrstack[--ptrstackptr];
ptr = ptrstack[--ptrstackptr];
continue;
}
/* Handle the next character */
if (literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
{
literal = FALSE;
ptr += 2;
continue;
}
goto LOADLITERAL;
}
/* Not in literal mode. */
if (*ptr == CHAR_DOLLAR_SIGN)
{
int group, n;
uint32_t special = 0;
BOOL inparens;
BOOL star;
PCRE2_SIZE sublength;
PCRE2_SPTR text1_start = NULL;
PCRE2_SPTR text1_end = NULL;
PCRE2_SPTR text2_start = NULL;
PCRE2_SPTR text2_end = NULL;
PCRE2_UCHAR next;
PCRE2_UCHAR name[33];
if (++ptr >= repend) goto BAD;
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
group = -1;
n = 0;
inparens = FALSE;
star = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
if (++ptr >= repend) goto BAD;
next = *ptr;
inparens = TRUE;
}
if (next == CHAR_ASTERISK)
{
if (++ptr >= repend) goto BAD;
next = *ptr;
star = TRUE;
}
if (!star && next >= CHAR_0 && next <= CHAR_9)
{
group = next - CHAR_0;
while (++ptr < repend)
{
next = *ptr;
if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0;
/* A check for a number greater than the hightest captured group
is sufficient here; no need for a separate overflow check. If unknown
groups are to be treated as unset, just skip over any remaining
digits and carry on. */
if (group > code->top_bracket)
{
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
break;
}
else
{
rc = PCRE2_ERROR_NOSUBSTRING;
goto PTREXIT;
}
}
}
}
else
{
const uint8_t *ctypes = code->tables + ctypes_offset;
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
{
name[n++] = next;
if (n > 32) goto BAD;
if (++ptr >= repend) break;
next = *ptr;
}
if (n == 0) goto BAD;
name[n] = 0;
}
/* In extended mode we recognize ${name:+set text:unset text} and
${name:-default text}. */
if (inparens)
{
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
!star && ptr < repend - 2 && next == CHAR_COLON)
{
special = *(++ptr);
if (special != CHAR_PLUS && special != CHAR_MINUS)
{
rc = PCRE2_ERROR_BADSUBSTITUTION;
goto PTREXIT;
}
text1_start = ++ptr;
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
if (rc != 0) goto PTREXIT;
text1_end = ptr;
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
{
text2_start = ++ptr;
rc = find_text_end(code, &ptr, repend, TRUE);
if (rc != 0) goto PTREXIT;
text2_end = ptr;
}
}
else
{
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
{
rc = PCRE2_ERROR_REPMISSINGBRACE;
goto PTREXIT;
}
}
ptr++;
}
/* Have found a syntactically correct group number or name, or *name.
Only *MARK is currently recognized. */
if (star)
{
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
{
PCRE2_SPTR mark = pcre2_get_mark(match_data);
if (mark != NULL)
{
PCRE2_SPTR mark_start = mark;
while (*mark != 0) mark++;
fraglength = mark - mark_start;
CHECKMEMCPY(mark_start, fraglength);
}
}
else goto BAD;
}
/* Substitute the contents of a group. We don't use substring_copy
functions any more, in order to support case forcing. */
else
{
PCRE2_SPTR subptr, subptrend;
/* Find a number for a named group. In case there are duplicate names,
search for the first one that is set. If the name is not found when
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
non-existent group. */
if (group < 0)
{
PCRE2_SPTR first, last, entry;
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
if (rc == PCRE2_ERROR_NOSUBSTRING &&
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
group = code->top_bracket + 1;
}
else
{
if (rc < 0) goto PTREXIT;
for (entry = first; entry <= last; entry += rc)
{
uint32_t ng = GET2(entry, 0);
if (ng < ovector_count)
{
if (group < 0) group = ng; /* First in ovector */
if (ovector[ng*2] != PCRE2_UNSET)
{
group = ng; /* First that is set */
break;
}
}
}
/* If group is still negative, it means we did not find a group
that is in the ovector. Just set the first group. */
if (group < 0) group = GET2(first, 0);
}
}
/* We now have a group that is identified by number. Find the length of
the captured string. If a group in a non-special substitution is unset
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
if (rc < 0)
{
if (rc == PCRE2_ERROR_NOSUBSTRING &&
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
{
rc = PCRE2_ERROR_UNSET;
}
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
if (special == 0) /* Plain substitution */
{
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
goto PTREXIT; /* Else error */
}
}
/* If special is '+' we have a 'set' and possibly an 'unset' text,
both of which are reprocessed when used. If special is '-' we have a
default text for when the group is unset; it must be reprocessed. */
if (special != 0)
{
if (special == CHAR_MINUS)
{
if (rc == 0) goto LITERAL_SUBSTITUTE;
text2_start = text1_start;
text2_end = text1_end;
}
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
ptrstack[ptrstackptr++] = ptr;
ptrstack[ptrstackptr++] = repend;
if (rc == 0)
{
ptr = text1_start;
repend = text1_end;
}
else
{
ptr = text2_start;
repend = text2_end;
}
continue;
}
/* Otherwise we have a literal substitution of a group's contents. */
LITERAL_SUBSTITUTE:
subptr = subject + ovector[group*2];
subptrend = subject + ovector[group*2 + 1];
/* Substitute a literal string, possibly forcing alphabetic case. */
while (subptr < subptrend)
{
GETCHARINCTEST(ch, subptr);
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
ch = UCD_OTHERCASE(ch);
}
else
#endif
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
)[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
}
#ifdef SUPPORT_UNICODE
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
{
temp[0] = ch;
chlen = 1;
}
CHECKMEMCPY(temp, chlen);
}
}
}
/* Handle an escape sequence in extended mode. We can use check_escape()
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
the case-forcing escapes are not supported in pcre2_compile() so must be
recognized here. */
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
*ptr == CHAR_BACKSLASH)
{
int errorcode;
if (ptr < repend - 1) switch (ptr[1])
{
case CHAR_L:
forcecase = forcecasereset = -1;
ptr += 2;
continue;
case CHAR_l:
forcecase = -1;
forcecasereset = 0;
ptr += 2;
continue;
case CHAR_U:
forcecase = forcecasereset = 1;
ptr += 2;
continue;
case CHAR_u:
forcecase = 1;
forcecasereset = 0;
ptr += 2;
continue;
default:
break;
}
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
{
case ESC_E:
forcecase = forcecasereset = 0;
continue;
case ESC_Q:
literal = TRUE;
continue;
case 0: /* Data character */
goto LITERAL;
default:
goto BADESCAPE;
}
}
/* Handle a literal code unit */
else
{
LOADLITERAL:
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
LITERAL:
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
ch = UCD_OTHERCASE(ch);
}
else
#endif
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
)[ch/8] & (1 << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
}
#ifdef SUPPORT_UNICODE
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
{
temp[0] = ch;
chlen = 1;
}
CHECKMEMCPY(temp, chlen);
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
/* The replacement has been copied to the output. Update the start offset to
point to the rest of the subject string. If we matched an empty string,
do the magic for global matches. */
start_offset = ovector[1];
goptions = (ovector[0] != ovector[1])? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */
fraglength = length - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
temp[0] = 0;
CHECKMEMCPY(temp , 1);
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
and matching has carried on after a full buffer, in order to compute the length
needed. Otherwise, an overflow generates an immediate error return. */
if (overflowed)
{
rc = PCRE2_ERROR_NOMEMORY;
*blength = buff_length + extra_needed;
}
/* After a successful execution, return the number of substitutions and set the
length of buffer used, excluding the trailing zero. */
else
{
rc = subs;
*blength = buff_offset - 1;
}
EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
else match_data->rc = rc;
return rc;
NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;
BAD:
rc = PCRE2_ERROR_BADREPLACEMENT;
goto PTREXIT;
BADESCAPE:
rc = PCRE2_ERROR_BADREPESCAPE;
PTREXIT:
*blength = (PCRE2_SIZE)(ptr - replacement);
goto EXIT;
}
/* End of pcre2_substitute.c */

542
thirdparty/pcre2/src/pcre2_substring.c vendored Normal file
View file

@ -0,0 +1,542 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Copy named captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
match_data points to the match data
stringname the name of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: zero
if not successful, a negative error code:
(1) an error from nametable_scan()
(2) an error from copy_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Copy numbered captured string to given buffer *
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by number.
Arguments:
match_data points to the match data
stringnumber the number of the required substring
buffer where to put the substring
sizeptr the size of the buffer, updated to the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: buffer too small
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
buffer[size] = 0;
*sizeptr = size;
return 0;
}
/*************************************************
* Extract named captured string *
*************************************************/
/* This function copies a single captured substring, identified by name, into
new memory. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
match_data pointer to match_data
stringname the name of the required substring
stringptr where to put the pointer to the new memory
sizeptr where to put the length of the substring
Returns: if successful: zero
if not successful, a negative value:
(1) an error from nametable_scan()
(2) an error from get_bynumber()
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Extract captured string to new memory *
*************************************************/
/* This function copies a single captured substring into a piece of new
memory.
Arguments:
match_data points to match data
stringnumber the number of the required substring
stringptr where to put a pointer to the new memory
sizeptr where to put the size of the substring
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
PCRE2_UCHAR *yield;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
CU2BYTES(size));
yield[size] = 0;
*stringptr = yield;
*sizeptr = size;
return 0;
}
/*************************************************
* Free memory obtained by get_substring *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_get_byxxx()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_free(PCRE2_UCHAR *string)
{
if (string != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Get length of a named substring *
*************************************************/
/* This function returns the length of a named captured substring. If the regex
permits duplicate names, the first substring that is set is chosen.
Arguments:
match_data pointer to match data
stringname the name of the required substring
sizeptr where to put the length
Returns: 0 if successful, else a negative error number
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_byname(pcre2_match_data *match_data,
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
{
PCRE2_SPTR first, last, entry;
int failrc, entrysize;
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
return PCRE2_ERROR_DFA_UFUNC;
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
&first, &last);
if (entrysize < 0) return entrysize;
failrc = PCRE2_ERROR_UNAVAILABLE;
for (entry = first; entry <= last; entry += entrysize)
{
uint32_t n = GET2(entry, 0);
if (n < match_data->oveccount)
{
if (match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
failrc = PCRE2_ERROR_UNSET;
}
}
return failrc;
}
/*************************************************
* Get length of a numbered substring *
*************************************************/
/* This function returns the length of a captured substring. If the start is
beyond the end (which can happen when \K is used in an assertion), it sets the
length to zero.
Arguments:
match_data pointer to match data
stringnumber the number of the required substring
sizeptr where to put the length, if not NULL
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOSUBSTRING: no such substring
PCRE2_ERROR_UNAVAILABLE: ovector is too small
PCRE2_ERROR_UNSET: substring is not set
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
{
PCRE2_SIZE left, right;
int count = match_data->rc;
if (count == PCRE2_ERROR_PARTIAL)
{
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
count = 0;
}
else if (count < 0) return count; /* Match failed */
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
{
if (stringnumber > match_data->code->top_bracket)
return PCRE2_ERROR_NOSUBSTRING;
if (stringnumber >= match_data->oveccount)
return PCRE2_ERROR_UNAVAILABLE;
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
return PCRE2_ERROR_UNSET;
}
else /* Matched using pcre2_dfa_match() */
{
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
}
left = match_data->ovector[stringnumber*2];
right = match_data->ovector[stringnumber*2+1];
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
return 0;
}
/*************************************************
* Extract all captured strings to new memory *
*************************************************/
/* This function gets one chunk of memory and builds a list of pointers and all
the captured substrings in it. A NULL pointer is put on the end of the list.
The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be
handled.
Arguments:
match_data points to the match data
listptr set to point to the list of pointers
lengthsptr set to point to the list of lengths (may be NULL)
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory,
or a match failure code
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
PCRE2_SIZE **lengthsptr)
{
int i, count, count2;
PCRE2_SIZE size;
PCRE2_SIZE *lensp;
pcre2_memctl *memp;
PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp;
PCRE2_SIZE *ovector;
if ((count = match_data->rc) < 0) return count; /* Match failed */
if (count == 0) count = match_data->oveccount; /* Ovector too small */
count2 = 2*count;
ovector = match_data->ovector;
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
for (i = 0; i < count2; i += 2)
{
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
}
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
if (lengthsptr == NULL)
{
sp = (PCRE2_UCHAR *)lensp;
lensp = NULL;
}
else
{
*lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
}
for (i = 0; i < count2; i += 2)
{
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
*listp++ = sp;
if (lensp != NULL) *lensp++ = size;
sp += size;
*sp++ = 0;
}
*listp = NULL;
return 0;
}
/*************************************************
* Free memory obtained by substring_list_get *
*************************************************/
/*
Argument: the result of a previous pcre2_substring_list_get()
Returns: nothing
*/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_substring_list_free(PCRE2_SPTR *list)
{
if (list != NULL)
{
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
memctl->free(memctl, memctl->memory_data);
}
}
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a unique group with the given name. If duplicate names are
permitted, and the name is not unique, an error is generated.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
otherwise, if firstptr and lastptr are NULL:
a group number for a unique substring
else PCRE2_ERROR_NOUNIQUESUBSTRING
otherwise:
the length of each entry, having set firstptr and lastptr
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
{
uint16_t bot = 0;
uint16_t top = code->name_count;
uint16_t entrysize = code->name_entry_size;
PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
while (top > bot)
{
uint16_t mid = (top + bot) / 2;
PCRE2_SPTR entry = nametable + entrysize*mid;
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
if (c == 0)
{
PCRE2_SPTR first;
PCRE2_SPTR last;
PCRE2_SPTR lastentry;
lastentry = nametable + entrysize * (code->name_count - 1);
first = last = entry;
while (first > nametable)
{
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
last += entrysize;
}
if (firstptr == NULL) return (first == last)?
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
*firstptr = first;
*lastptr = last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE2_ERROR_NOSUBSTRING;
}
/*************************************************
* Find number for named string *
*************************************************/
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
when it is known that names are unique. If there are duplicate names, it is not
defined which number is returned.
Arguments:
code the compiled regex
stringname the name whose number is required
Returns: the number of the named parenthesis, or a negative number
PCRE2_ERROR_NOSUBSTRING if not found
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code *code,
PCRE2_SPTR stringname)
{
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
}
/* End of pcre2_substring.c */

765
thirdparty/pcre2/src/pcre2_tables.c vendored Normal file
View file

@ -0,0 +1,765 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcre2test program,
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
defined. */
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
This is mode-dependent, so is skipped when this file is included by pcre2test. */
#ifndef PCRE2_PCRE2TEST
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
#endif
/* Tables of horizontal and vertical whitespace characters, suitable for
adding to classes. */
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
/* These tables are the pairs of delimiters that are valid for callout string
arguments. For each starting delimiter there must be a matching ending
delimiter, which in fact is different only for bracket-like delimiters. */
const uint32_t PRIV(callout_start_delims)[] = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
const uint32_t PRIV(callout_end_delims[]) = {
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
handling wide characters. */
#if defined PCRE2_PCRE2TEST || \
(defined SUPPORT_UNICODE && \
defined PCRE2_CODE_UNIT_WIDTH && \
PCRE2_CODE_UNIT_WIDTH == 8)
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
const int PRIV(utf8_table1)[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uint8_t PRIV(utf8_table4)[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#endif /* UTF-8 support needed */
#ifdef SUPPORT_UNICODE
/* Table to translate from particular type value to the general value. */
const uint32_t PRIV(ucp_gentype)[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
the properties of two adjacent code points. The left property selects a word
from the table, and the right property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
The value is non-zero if a grapheme break is NOT permitted between the relevant
two code points. The breaking rules are as follows:
1. Break at the start and end of text (pretty obviously).
2. Do not break between a CR and LF; otherwise, break before and after
controls.
3. Do not break Hangul syllable sequences, the rules for which are:
L may be followed by L, V, LV or LVT
LV or V may be followed by V or T
LVT or T may be followed by T
4. Do not break before extending characters.
The next two rules are only for extended grapheme clusters (but that's what we
are implementing).
5. Do not break before SpacingMarks.
6. Do not break after Prepend characters.
7. Otherwise, break everywhere.
*/
const uint32_t PRIV(ucp_gbtable)[] = {
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
(1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
(1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
(1<<ucp_gbLVT)|(1<<ucp_gbOther),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
(1<<ucp_gbT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
(1<<ucp_gbT),
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
};
#ifdef SUPPORT_JIT
/* This table reverses PRIV(ucp_gentype). We can save the cost
of a memory load. */
const int PRIV(ucp_typerange)[] = {
ucp_Cc, ucp_Cs,
ucp_Ll, ucp_Lu,
ucp_Mc, ucp_Mn,
ucp_Nd, ucp_No,
ucp_Pc, ucp_Ps,
ucp_Sc, ucp_So,
ucp_Zl, ucp_Zs,
};
#endif /* SUPPORT_JIT */
/* The PRIV(utt)[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
data are unlikely.
July 2008: There is now a script called maint/GenerateUtt.py that can be used
to generate this data automatically instead of maintaining it by hand.
The script was updated in March 2009 to generate a new EBCDIC-compliant
version. Like all other character and string literals that are compared against
the regular expression pattern, we must use STR_ macros instead of literal
strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Any0 STR_A STR_n STR_y "\0"
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
#define STRING_C0 STR_C "\0"
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
#define STRING_Cc0 STR_C STR_c "\0"
#define STRING_Cf0 STR_C STR_f "\0"
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
#define STRING_Cn0 STR_C STR_n "\0"
#define STRING_Co0 STR_C STR_o "\0"
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
#define STRING_Cs0 STR_C STR_s "\0"
#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
#define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
#define STRING_L0 STR_L "\0"
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
#define STRING_Lao0 STR_L STR_a STR_o "\0"
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
#define STRING_Ll0 STR_L STR_l "\0"
#define STRING_Lm0 STR_L STR_m "\0"
#define STRING_Lo0 STR_L STR_o "\0"
#define STRING_Lt0 STR_L STR_t "\0"
#define STRING_Lu0 STR_L STR_u "\0"
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
#define STRING_Mn0 STR_M STR_n "\0"
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
#define STRING_Mro0 STR_M STR_r STR_o "\0"
#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
#define STRING_N0 STR_N "\0"
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
#define STRING_Nd0 STR_N STR_d "\0"
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
#define STRING_Nko0 STR_N STR_k STR_o "\0"
#define STRING_Nl0 STR_N STR_l "\0"
#define STRING_No0 STR_N STR_o "\0"
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
#define STRING_P0 STR_P "\0"
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
#define STRING_Pc0 STR_P STR_c "\0"
#define STRING_Pd0 STR_P STR_d "\0"
#define STRING_Pe0 STR_P STR_e "\0"
#define STRING_Pf0 STR_P STR_f "\0"
#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
#define STRING_Pi0 STR_P STR_i "\0"
#define STRING_Po0 STR_P STR_o "\0"
#define STRING_Ps0 STR_P STR_s "\0"
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
#define STRING_S0 STR_S "\0"
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
#define STRING_Sc0 STR_S STR_c "\0"
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
#define STRING_Vai0 STR_V STR_a STR_i "\0"
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
#define STRING_Zl0 STR_Z STR_l "\0"
#define STRING_Zp0 STR_Z STR_p "\0"
#define STRING_Zs0 STR_Z STR_s "\0"
const char PRIV(utt_names)[] =
STRING_Ahom0
STRING_Anatolian_Hieroglyphs0
STRING_Any0
STRING_Arabic0
STRING_Armenian0
STRING_Avestan0
STRING_Balinese0
STRING_Bamum0
STRING_Bassa_Vah0
STRING_Batak0
STRING_Bengali0
STRING_Bopomofo0
STRING_Brahmi0
STRING_Braille0
STRING_Buginese0
STRING_Buhid0
STRING_C0
STRING_Canadian_Aboriginal0
STRING_Carian0
STRING_Caucasian_Albanian0
STRING_Cc0
STRING_Cf0
STRING_Chakma0
STRING_Cham0
STRING_Cherokee0
STRING_Cn0
STRING_Co0
STRING_Common0
STRING_Coptic0
STRING_Cs0
STRING_Cuneiform0
STRING_Cypriot0
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
STRING_Ethiopic0
STRING_Georgian0
STRING_Glagolitic0
STRING_Gothic0
STRING_Grantha0
STRING_Greek0
STRING_Gujarati0
STRING_Gurmukhi0
STRING_Han0
STRING_Hangul0
STRING_Hanunoo0
STRING_Hatran0
STRING_Hebrew0
STRING_Hiragana0
STRING_Imperial_Aramaic0
STRING_Inherited0
STRING_Inscriptional_Pahlavi0
STRING_Inscriptional_Parthian0
STRING_Javanese0
STRING_Kaithi0
STRING_Kannada0
STRING_Katakana0
STRING_Kayah_Li0
STRING_Kharoshthi0
STRING_Khmer0
STRING_Khojki0
STRING_Khudawadi0
STRING_L0
STRING_L_AMPERSAND0
STRING_Lao0
STRING_Latin0
STRING_Lepcha0
STRING_Limbu0
STRING_Linear_A0
STRING_Linear_B0
STRING_Lisu0
STRING_Ll0
STRING_Lm0
STRING_Lo0
STRING_Lt0
STRING_Lu0
STRING_Lycian0
STRING_Lydian0
STRING_M0
STRING_Mahajani0
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
STRING_Mc0
STRING_Me0
STRING_Meetei_Mayek0
STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0
STRING_Meroitic_Hieroglyphs0
STRING_Miao0
STRING_Mn0
STRING_Modi0
STRING_Mongolian0
STRING_Mro0
STRING_Multani0
STRING_Myanmar0
STRING_N0
STRING_Nabataean0
STRING_Nd0
STRING_New_Tai_Lue0
STRING_Nko0
STRING_Nl0
STRING_No0
STRING_Ogham0
STRING_Ol_Chiki0
STRING_Old_Hungarian0
STRING_Old_Italic0
STRING_Old_North_Arabian0
STRING_Old_Permic0
STRING_Old_Persian0
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
STRING_Osmanya0
STRING_P0
STRING_Pahawh_Hmong0
STRING_Palmyrene0
STRING_Pau_Cin_Hau0
STRING_Pc0
STRING_Pd0
STRING_Pe0
STRING_Pf0
STRING_Phags_Pa0
STRING_Phoenician0
STRING_Pi0
STRING_Po0
STRING_Ps0
STRING_Psalter_Pahlavi0
STRING_Rejang0
STRING_Runic0
STRING_S0
STRING_Samaritan0
STRING_Saurashtra0
STRING_Sc0
STRING_Sharada0
STRING_Shavian0
STRING_Siddham0
STRING_SignWriting0
STRING_Sinhala0
STRING_Sk0
STRING_Sm0
STRING_So0
STRING_Sora_Sompeng0
STRING_Sundanese0
STRING_Syloti_Nagri0
STRING_Syriac0
STRING_Tagalog0
STRING_Tagbanwa0
STRING_Tai_Le0
STRING_Tai_Tham0
STRING_Tai_Viet0
STRING_Takri0
STRING_Tamil0
STRING_Telugu0
STRING_Thaana0
STRING_Thai0
STRING_Tibetan0
STRING_Tifinagh0
STRING_Tirhuta0
STRING_Ugaritic0
STRING_Vai0
STRING_Warang_Citi0
STRING_Xan0
STRING_Xps0
STRING_Xsp0
STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
STRING_Zl0
STRING_Zp0
STRING_Zs0;
const ucp_type_table PRIV(utt)[] = {
{ 0, PT_SC, ucp_Ahom },
{ 5, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 27, PT_ANY, 0 },
{ 31, PT_SC, ucp_Arabic },
{ 38, PT_SC, ucp_Armenian },
{ 47, PT_SC, ucp_Avestan },
{ 55, PT_SC, ucp_Balinese },
{ 64, PT_SC, ucp_Bamum },
{ 70, PT_SC, ucp_Bassa_Vah },
{ 80, PT_SC, ucp_Batak },
{ 86, PT_SC, ucp_Bengali },
{ 94, PT_SC, ucp_Bopomofo },
{ 103, PT_SC, ucp_Brahmi },
{ 110, PT_SC, ucp_Braille },
{ 118, PT_SC, ucp_Buginese },
{ 127, PT_SC, ucp_Buhid },
{ 133, PT_GC, ucp_C },
{ 135, PT_SC, ucp_Canadian_Aboriginal },
{ 155, PT_SC, ucp_Carian },
{ 162, PT_SC, ucp_Caucasian_Albanian },
{ 181, PT_PC, ucp_Cc },
{ 184, PT_PC, ucp_Cf },
{ 187, PT_SC, ucp_Chakma },
{ 194, PT_SC, ucp_Cham },
{ 199, PT_SC, ucp_Cherokee },
{ 208, PT_PC, ucp_Cn },
{ 211, PT_PC, ucp_Co },
{ 214, PT_SC, ucp_Common },
{ 221, PT_SC, ucp_Coptic },
{ 228, PT_PC, ucp_Cs },
{ 231, PT_SC, ucp_Cuneiform },
{ 241, PT_SC, ucp_Cypriot },
{ 249, PT_SC, ucp_Cyrillic },
{ 258, PT_SC, ucp_Deseret },
{ 266, PT_SC, ucp_Devanagari },
{ 277, PT_SC, ucp_Duployan },
{ 286, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 307, PT_SC, ucp_Elbasan },
{ 315, PT_SC, ucp_Ethiopic },
{ 324, PT_SC, ucp_Georgian },
{ 333, PT_SC, ucp_Glagolitic },
{ 344, PT_SC, ucp_Gothic },
{ 351, PT_SC, ucp_Grantha },
{ 359, PT_SC, ucp_Greek },
{ 365, PT_SC, ucp_Gujarati },
{ 374, PT_SC, ucp_Gurmukhi },
{ 383, PT_SC, ucp_Han },
{ 387, PT_SC, ucp_Hangul },
{ 394, PT_SC, ucp_Hanunoo },
{ 402, PT_SC, ucp_Hatran },
{ 409, PT_SC, ucp_Hebrew },
{ 416, PT_SC, ucp_Hiragana },
{ 425, PT_SC, ucp_Imperial_Aramaic },
{ 442, PT_SC, ucp_Inherited },
{ 452, PT_SC, ucp_Inscriptional_Pahlavi },
{ 474, PT_SC, ucp_Inscriptional_Parthian },
{ 497, PT_SC, ucp_Javanese },
{ 506, PT_SC, ucp_Kaithi },
{ 513, PT_SC, ucp_Kannada },
{ 521, PT_SC, ucp_Katakana },
{ 530, PT_SC, ucp_Kayah_Li },
{ 539, PT_SC, ucp_Kharoshthi },
{ 550, PT_SC, ucp_Khmer },
{ 556, PT_SC, ucp_Khojki },
{ 563, PT_SC, ucp_Khudawadi },
{ 573, PT_GC, ucp_L },
{ 575, PT_LAMP, 0 },
{ 578, PT_SC, ucp_Lao },
{ 582, PT_SC, ucp_Latin },
{ 588, PT_SC, ucp_Lepcha },
{ 595, PT_SC, ucp_Limbu },
{ 601, PT_SC, ucp_Linear_A },
{ 610, PT_SC, ucp_Linear_B },
{ 619, PT_SC, ucp_Lisu },
{ 624, PT_PC, ucp_Ll },
{ 627, PT_PC, ucp_Lm },
{ 630, PT_PC, ucp_Lo },
{ 633, PT_PC, ucp_Lt },
{ 636, PT_PC, ucp_Lu },
{ 639, PT_SC, ucp_Lycian },
{ 646, PT_SC, ucp_Lydian },
{ 653, PT_GC, ucp_M },
{ 655, PT_SC, ucp_Mahajani },
{ 664, PT_SC, ucp_Malayalam },
{ 674, PT_SC, ucp_Mandaic },
{ 682, PT_SC, ucp_Manichaean },
{ 693, PT_PC, ucp_Mc },
{ 696, PT_PC, ucp_Me },
{ 699, PT_SC, ucp_Meetei_Mayek },
{ 712, PT_SC, ucp_Mende_Kikakui },
{ 726, PT_SC, ucp_Meroitic_Cursive },
{ 743, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 764, PT_SC, ucp_Miao },
{ 769, PT_PC, ucp_Mn },
{ 772, PT_SC, ucp_Modi },
{ 777, PT_SC, ucp_Mongolian },
{ 787, PT_SC, ucp_Mro },
{ 791, PT_SC, ucp_Multani },
{ 799, PT_SC, ucp_Myanmar },
{ 807, PT_GC, ucp_N },
{ 809, PT_SC, ucp_Nabataean },
{ 819, PT_PC, ucp_Nd },
{ 822, PT_SC, ucp_New_Tai_Lue },
{ 834, PT_SC, ucp_Nko },
{ 838, PT_PC, ucp_Nl },
{ 841, PT_PC, ucp_No },
{ 844, PT_SC, ucp_Ogham },
{ 850, PT_SC, ucp_Ol_Chiki },
{ 859, PT_SC, ucp_Old_Hungarian },
{ 873, PT_SC, ucp_Old_Italic },
{ 884, PT_SC, ucp_Old_North_Arabian },
{ 902, PT_SC, ucp_Old_Permic },
{ 913, PT_SC, ucp_Old_Persian },
{ 925, PT_SC, ucp_Old_South_Arabian },
{ 943, PT_SC, ucp_Old_Turkic },
{ 954, PT_SC, ucp_Oriya },
{ 960, PT_SC, ucp_Osmanya },
{ 968, PT_GC, ucp_P },
{ 970, PT_SC, ucp_Pahawh_Hmong },
{ 983, PT_SC, ucp_Palmyrene },
{ 993, PT_SC, ucp_Pau_Cin_Hau },
{ 1005, PT_PC, ucp_Pc },
{ 1008, PT_PC, ucp_Pd },
{ 1011, PT_PC, ucp_Pe },
{ 1014, PT_PC, ucp_Pf },
{ 1017, PT_SC, ucp_Phags_Pa },
{ 1026, PT_SC, ucp_Phoenician },
{ 1037, PT_PC, ucp_Pi },
{ 1040, PT_PC, ucp_Po },
{ 1043, PT_PC, ucp_Ps },
{ 1046, PT_SC, ucp_Psalter_Pahlavi },
{ 1062, PT_SC, ucp_Rejang },
{ 1069, PT_SC, ucp_Runic },
{ 1075, PT_GC, ucp_S },
{ 1077, PT_SC, ucp_Samaritan },
{ 1087, PT_SC, ucp_Saurashtra },
{ 1098, PT_PC, ucp_Sc },
{ 1101, PT_SC, ucp_Sharada },
{ 1109, PT_SC, ucp_Shavian },
{ 1117, PT_SC, ucp_Siddham },
{ 1125, PT_SC, ucp_SignWriting },
{ 1137, PT_SC, ucp_Sinhala },
{ 1145, PT_PC, ucp_Sk },
{ 1148, PT_PC, ucp_Sm },
{ 1151, PT_PC, ucp_So },
{ 1154, PT_SC, ucp_Sora_Sompeng },
{ 1167, PT_SC, ucp_Sundanese },
{ 1177, PT_SC, ucp_Syloti_Nagri },
{ 1190, PT_SC, ucp_Syriac },
{ 1197, PT_SC, ucp_Tagalog },
{ 1205, PT_SC, ucp_Tagbanwa },
{ 1214, PT_SC, ucp_Tai_Le },
{ 1221, PT_SC, ucp_Tai_Tham },
{ 1230, PT_SC, ucp_Tai_Viet },
{ 1239, PT_SC, ucp_Takri },
{ 1245, PT_SC, ucp_Tamil },
{ 1251, PT_SC, ucp_Telugu },
{ 1258, PT_SC, ucp_Thaana },
{ 1265, PT_SC, ucp_Thai },
{ 1270, PT_SC, ucp_Tibetan },
{ 1278, PT_SC, ucp_Tifinagh },
{ 1287, PT_SC, ucp_Tirhuta },
{ 1295, PT_SC, ucp_Ugaritic },
{ 1304, PT_SC, ucp_Vai },
{ 1308, PT_SC, ucp_Warang_Citi },
{ 1320, PT_ALNUM, 0 },
{ 1324, PT_PXSPACE, 0 },
{ 1328, PT_SPACE, 0 },
{ 1332, PT_UCNC, 0 },
{ 1336, PT_WORD, 0 },
{ 1340, PT_SC, ucp_Yi },
{ 1343, PT_GC, ucp_Z },
{ 1345, PT_PC, ucp_Zl },
{ 1348, PT_PC, ucp_Zp },
{ 1351, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
#endif /* SUPPORT_UNICODE */
/* End of pcre2_tables.c */

3747
thirdparty/pcre2/src/pcre2_ucd.c vendored Normal file

File diff suppressed because it is too large Load diff

268
thirdparty/pcre2/src/pcre2_ucp.h vendored Normal file
View file

@ -0,0 +1,268 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility.
IMPORTANT: Note also that the specific numeric values of the enums have to be
the same as the values that are generated by the maint/MultiStage2.py script,
where the equivalent property descriptive names are listed in vectors.
ALSO: The specific values of the first two enums are assumed for the table
called catposstab in pcre2_compile.c. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
ucp_L, /* Letter */
ucp_M, /* Mark */
ucp_N, /* Number */
ucp_P, /* Punctuation */
ucp_S, /* Symbol */
ucp_Z /* Separator */
};
/* These are the particular character categories. */
enum {
ucp_Cc, /* Control */
ucp_Cf, /* Format */
ucp_Cn, /* Unassigned */
ucp_Co, /* Private use */
ucp_Cs, /* Surrogate */
ucp_Ll, /* Lower case letter */
ucp_Lm, /* Modifier letter */
ucp_Lo, /* Other letter */
ucp_Lt, /* Title case letter */
ucp_Lu, /* Upper case letter */
ucp_Mc, /* Spacing mark */
ucp_Me, /* Enclosing mark */
ucp_Mn, /* Non-spacing mark */
ucp_Nd, /* Decimal number */
ucp_Nl, /* Letter number */
ucp_No, /* Other number */
ucp_Pc, /* Connector punctuation */
ucp_Pd, /* Dash punctuation */
ucp_Pe, /* Close punctuation */
ucp_Pf, /* Final punctuation */
ucp_Pi, /* Initial punctuation */
ucp_Po, /* Other punctuation */
ucp_Ps, /* Open punctuation */
ucp_Sc, /* Currency symbol */
ucp_Sk, /* Modifier symbol */
ucp_Sm, /* Mathematical symbol */
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs /* Space separator */
};
/* These are grapheme break properties. Note that the code for processing them
assumes that the values are less than 16. If more values are added that take
the number to 16 or more, the code will have to be rewritten. */
enum {
ucp_gbCR, /* 0 */
ucp_gbLF, /* 1 */
ucp_gbControl, /* 2 */
ucp_gbExtend, /* 3 */
ucp_gbPrepend, /* 4 */
ucp_gbSpacingMark, /* 5 */
ucp_gbL, /* 6 Hangul syllable type L */
ucp_gbV, /* 7 Hangul syllable type V */
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther /* 12 */
};
/* These are the script identifications. */
enum {
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
/* New for Unicode 5.0: */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1: */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
ucp_Ol_Chiki,
ucp_Rejang,
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
/* New for Unicode 5.2: */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu,
ucp_Meetei_Mayek,
ucp_Old_South_Arabian,
ucp_Old_Turkic,
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
/* New for Unicode 6.0.0: */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
/* New for Unicode 6.1.0: */
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
/* New for Unicode 7.0.0: */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro,
ucp_Nabataean,
ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong,
ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi,
/* New for Unicode 8.0.0: */
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian,
ucp_SignWriting
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */

398
thirdparty/pcre2/src/pcre2_valid_utf.c vendored Normal file
View file

@ -0,0 +1,398 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function for validating UTF character
strings. This file is also #included by the pcre2test program, which uses
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
with the library. In this case, PCRE2_PCRE2TEST is defined. */
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
#endif /* PCRE2_PCRE2TEST */
#ifndef SUPPORT_UNICODE
/*************************************************
* Dummy function when Unicode is not supported *
*************************************************/
/* This function should never be called when Unicode is not supported. */
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
(void)string;
(void)length;
(void)erroroffset;
return 0;
}
#else /* UTF is supported */
/*************************************************
* Validate a UTF string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
check that a supposed UTF string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying an
invalid string are then undefined.
Arguments:
string points to the string
length length of string
errp pointer to an error position offset variable
Returns: == 0 if the string is a valid UTF string
!= 0 otherwise, setting the offset of the bad character
*/
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
PCRE2_SPTR p;
uint32_t c;
/* ----------------- Check a UTF-8 string ----------------- */
#if PCRE2_CODE_UNIT_WIDTH == 8
/* Originally, this function checked according to RFC 2279, allowing for values
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
characters is still checked. Error returns are as follows:
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
*/
for (p = string; length > 0; p++)
{
uint32_t ab, d;
c = *p;
length--;
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
{
*erroroffset = (int)(p - string);
return PCRE2_ERROR_UTF8_ERR20;
}
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
{
*erroroffset = (int)(p - string);
return PCRE2_ERROR_UTF8_ERR21;
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < ab) /* Missing bytes */
{
*erroroffset = (int)(p - string);
switch(ab - length)
{
case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5;
}
}
length -= ab; /* Length remaining */
/* Check top bits in the second byte */
if (((d = *(++p)) & 0xc0) != 0x80)
{
*erroroffset = (int)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR6;
}
/* For each length, check that the remaining bytes start with the 0x80 bit
set and not the 0x40 bit. Then check for an overlong sequence, and for the
excluded range 0xd800 to 0xdfff. */
switch (ab)
{
/* 2-byte character. No further bytes to check for 0x80. Check first byte
for for xx00 000x (overlong sequence). */
case 1: if ((c & 0x3e) == 0)
{
*erroroffset = (int)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR15;
}
break;
/* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd800 - 0xdfff) */
case 2:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if (c == 0xe0 && (d & 0x20) == 0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR16;
}
if (c == 0xed && d >= 0xa0)
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR14;
}
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
character greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if (c == 0xf0 && (d & 0x30) == 0)
{
*erroroffset = (int)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR17;
}
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
{
*erroroffset = (int)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR13;
}
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
rejected by the length test below. However, we do the appropriate tests
here so that overlong sequences get diagnosed, and also in case there is
ever an option for handling these larger code points. */
/* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
1111 1000, xx00 0xxx */
case 4:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if (c == 0xf8 && (d & 0x38) == 0)
{
*erroroffset = (int)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR18;
}
break;
/* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
1111 1100, xx00 00xx. */
case 5:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
{
*erroroffset = (int)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR10;
}
if (c == 0xfc && (d & 0x3c) == 0)
{
*erroroffset = (int)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR19;
}
break;
}
/* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
excluded by RFC 3629. The pointer p is currently at the last byte of the
character. */
if (ab > 3)
{
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
}
}
return 0;
/* ----------------- Check a UTF-16 string ----------------- */
#elif PCRE2_CODE_UNIT_WIDTH == 16
/* There's not so much work, nor so many errors, for UTF-16.
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
*/
for (p = string; length > 0; p++)
{
c = *p;
length--;
if ((c & 0xf800) != 0xd800)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
}
else if ((c & 0x0400) == 0)
{
/* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR1;
}
p++;
length--;
if ((*p & 0xfc00) != 0xdc00)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR2;
}
}
else
{
/* Isolated low surrogate. Always an error. */
*erroroffset = p - string;
return PCRE2_ERROR_UTF16_ERR3;
}
}
return 0;
/* ----------------- Check a UTF-32 string ----------------- */
#else
/* There is very little to do for a UTF-32 string.
PCRE2_ERROR_UTF32_ERR1 Surrogate character
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
*/
for (p = string; length > 0; length--, p++)
{
c = *p;
if ((c & 0xfffff800u) != 0xd800u)
{
/* Normal UTF-32 code point. Neither high nor low surrogate. */
if (c > 0x10ffffu)
{
*erroroffset = p - string;
return PCRE2_ERROR_UTF32_ERR2;
}
}
else
{
/* A surrogate */
*erroroffset = p - string;
return PCRE2_ERROR_UTF32_ERR1;
}
}
return 0;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
/* End of pcre2_valid_utf.c */

271
thirdparty/pcre2/src/pcre2_xclass.c vendored Normal file
View file

@ -0,0 +1,271 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains an internal function that is used to match an extended
class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
pcre2_def_match(). */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "pcre2_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain codepoints above 255 and/or Unicode properties.
Arguments:
c the character
data points to the flag code unit of the XCLASS data
utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE
*/
BOOL
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
{
PCRE2_UCHAR t;
BOOL negated = (*data & XCL_NOT) != 0;
#if PCRE2_CODE_UNIT_WIDTH == 8
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
utf = TRUE;
#endif
/* Code points < 256 are matched against a bitmap, if one is present. If not,
we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_HASPROP) == 0)
{
if ((*data & XCL_MAP) == 0) return negated;
return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
}
if ((*data & XCL_MAP) != 0 &&
(((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
while ((t = *data++) != XCL_END)
{
uint32_t x, y;
if (t == XCL_SINGLE)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
}
else
#endif
x = *data++;
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
GETCHARINC(x, data); /* macro generates multiple statements */
GETCHARINC(y, data); /* macro generates multiple statements */
}
else
#endif
{
x = *data++;
y = *data++;
}
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UNICODE
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
BOOL isprop = t == XCL_PROP;
switch(*data)
{
case PT_ANY:
if (isprop) return !negated;
break;
case PT_LAMP:
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == isprop) return !negated;
break;
case PT_GC:
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
return !negated;
break;
case PT_PC:
if ((data[1] == prop->chartype) == isprop) return !negated;
break;
case PT_SC:
if ((data[1] == prop->script) == isprop) return !negated;
break;
case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
return !negated;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
if (isprop) return !negated;
break;
default:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
return !negated;
break;
}
break;
case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
== isprop)
return !negated;
break;
case PT_UCNC:
if (c < 0xa0)
{
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT) == isprop)
return !negated;
}
else
{
if ((c < 0xd800 || c > 0xdfff) == isprop)
return !negated;
}
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */
/* Graphic character. Implement this as not Z (space or separator) and
not C (other), except for Cf (format) with a few exceptions. This seems
to be what Perl does. The exceptional characters are:
U+061C Arabic Letter Mark
U+180E Mongolian Vowel Separator
U+2066 - U+2069 Various "isolate"s
*/
case PT_PXGRAPH:
if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Printable character: same as graphic, with the addition of Zs, i.e.
not Zl and not Zp, and U+180E. */
case PT_PXPRINT:
if ((prop->chartype != ucp_Zl &&
prop->chartype != ucp_Zp &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Punctuation: all Unicode punctuation, plus ASCII characters that
Unicode treats as symbols rather than punctuation, for Perl
compatibility (these are $+<=>^`|~). */
case PT_PXPUNCT:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
(c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#else
(void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UNICODE */
}
return negated; /* char did not match */
}
/* End of pcre2_xclass.c */

145
thirdparty/pcre2/src/sljit/sljitConfig.h vendored Normal file
View file

@ -0,0 +1,145 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SLJIT_CONFIG_H_
#define _SLJIT_CONFIG_H_
/* --------------------------------------------------------------------- */
/* Custom defines */
/* --------------------------------------------------------------------- */
/* Put your custom defines here. This empty section will never change
which helps maintaining patches (with diff / patch utilities). */
/* --------------------------------------------------------------------- */
/* Architecture */
/* --------------------------------------------------------------------- */
/* Architecture selection. */
/* #define SLJIT_CONFIG_X86_32 1 */
/* #define SLJIT_CONFIG_X86_64 1 */
/* #define SLJIT_CONFIG_ARM_V5 1 */
/* #define SLJIT_CONFIG_ARM_V7 1 */
/* #define SLJIT_CONFIG_ARM_THUMB2 1 */
/* #define SLJIT_CONFIG_ARM_64 1 */
/* #define SLJIT_CONFIG_PPC_32 1 */
/* #define SLJIT_CONFIG_PPC_64 1 */
/* #define SLJIT_CONFIG_MIPS_32 1 */
/* #define SLJIT_CONFIG_MIPS_64 1 */
/* #define SLJIT_CONFIG_SPARC_32 1 */
/* #define SLJIT_CONFIG_TILEGX 1 */
/* #define SLJIT_CONFIG_AUTO 1 */
/* #define SLJIT_CONFIG_UNSUPPORTED 1 */
/* --------------------------------------------------------------------- */
/* Utilities */
/* --------------------------------------------------------------------- */
/* Useful for thread-safe compiling of global functions. */
#ifndef SLJIT_UTIL_GLOBAL_LOCK
/* Enabled by default */
#define SLJIT_UTIL_GLOBAL_LOCK 1
#endif
/* Implements a stack like data structure (by using mmap / VirtualAlloc). */
#ifndef SLJIT_UTIL_STACK
/* Enabled by default */
#define SLJIT_UTIL_STACK 1
#endif
/* Single threaded application. Does not require any locks. */
#ifndef SLJIT_SINGLE_THREADED
/* Disabled by default. */
#define SLJIT_SINGLE_THREADED 0
#endif
/* --------------------------------------------------------------------- */
/* Configuration */
/* --------------------------------------------------------------------- */
/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should
define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMCPY, and NULL. */
#ifndef SLJIT_STD_MACROS_DEFINED
/* Disabled by default. */
#define SLJIT_STD_MACROS_DEFINED 0
#endif
/* Executable code allocation:
If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
#ifndef SLJIT_EXECUTABLE_ALLOCATOR
/* Enabled by default. */
#define SLJIT_EXECUTABLE_ALLOCATOR 1
/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses
an allocator which does not set writable and executable
permission flags at the same time. The trade-of is increased
memory consumption and disabled dynamic code modifications. */
#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR
/* Disabled by default. */
#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0
#endif
#endif
/* Force cdecl calling convention even if a better calling
convention (e.g. fastcall) is supported by the C compiler.
If this option is enabled, C functions without
SLJIT_CALL can also be called from JIT code. */
#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
/* Disabled by default */
#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
#endif
/* Return with error when an invalid argument is passed. */
#ifndef SLJIT_ARGUMENT_CHECKS
/* Disabled by default */
#define SLJIT_ARGUMENT_CHECKS 0
#endif
/* Debug checks (assertions, etc.). */
#ifndef SLJIT_DEBUG
/* Enabled by default */
#define SLJIT_DEBUG 1
#endif
/* Verbose operations. */
#ifndef SLJIT_VERBOSE
/* Enabled by default */
#define SLJIT_VERBOSE 1
#endif
/*
SLJIT_IS_FPU_AVAILABLE
The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE.
zero value - FPU is NOT present.
nonzero value - FPU is present.
*/
/* For further configurations, see the beginning of sljitConfigInternal.h */
#endif

View file

@ -0,0 +1,724 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _SLJIT_CONFIG_INTERNAL_H_
#define _SLJIT_CONFIG_INTERNAL_H_
/*
SLJIT defines the following architecture dependent types and macros:
Types:
sljit_s8, sljit_u8 : signed and unsigned 8 bit integer type
sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type
sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type
sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer
sljit_p : unsgined pointer value (usually the same as sljit_uw, but
some 64 bit ABIs may use 32 bit pointers)
sljit_f32 : 32 bit single precision floating point value
sljit_f64 : 64 bit double precision floating point value
Macros for feature detection (boolean):
SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
SLJIT_LITTLE_ENDIAN : little endian architecture
SLJIT_BIG_ENDIAN : big endian architecture
SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
Constants:
SLJIT_NUMBER_OF_REGISTERS : number of available registers
SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers
SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers
SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
SLJIT_F32_SHIFT : the shift required to apply when accessing
a single precision floating point array by index
SLJIT_F64_SHIFT : the shift required to apply when accessing
a double precision floating point array by index
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros:
SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/
/*****************/
/* Sanity check. */
/*****************/
#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|| (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
|| (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
|| (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
|| (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
|| (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
#error "An architecture must be selected"
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+ (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
+ (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \
+ (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+ (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+ (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+ (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
+ (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
#error "Multiple architectures are selected"
#endif
/********************************************************/
/* Automatic CPU detection (requires compiler support). */
/********************************************************/
#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
#ifndef _WIN32
#if defined(__i386__) || defined(__i386)
#define SLJIT_CONFIG_X86_32 1
#elif defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1
#elif defined(__arm__) || defined(__ARM__)
#ifdef __thumb2__
#define SLJIT_CONFIG_ARM_THUMB2 1
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__)
#define SLJIT_CONFIG_ARM_V7 1
#else
#define SLJIT_CONFIG_ARM_V5 1
#endif
#elif defined (__aarch64__)
#define SLJIT_CONFIG_ARM_64 1
#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__))
#define SLJIT_CONFIG_PPC_64 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
#define SLJIT_CONFIG_PPC_32 1
#elif defined(__mips__) && !defined(_LP64)
#define SLJIT_CONFIG_MIPS_32 1
#elif defined(__mips64)
#define SLJIT_CONFIG_MIPS_64 1
#elif defined(__sparc__) || defined(__sparc)
#define SLJIT_CONFIG_SPARC_32 1
#elif defined(__tilegx__)
#define SLJIT_CONFIG_TILEGX 1
#else
/* Unsupported architecture */
#define SLJIT_CONFIG_UNSUPPORTED 1
#endif
#else /* !_WIN32 */
#if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1
#elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1
#else
#define SLJIT_CONFIG_X86_32 1
#endif
#endif /* !WIN32 */
#endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#undef SLJIT_EXECUTABLE_ALLOCATOR
#endif
/******************************/
/* CPU family type detection. */
/******************************/
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
#define SLJIT_CONFIG_ARM_32 1
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#define SLJIT_CONFIG_X86 1
#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#define SLJIT_CONFIG_ARM 1
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#define SLJIT_CONFIG_PPC 1
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
#define SLJIT_CONFIG_MIPS 1
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
#define SLJIT_CONFIG_SPARC 1
#endif
/**********************************/
/* External function definitions. */
/**********************************/
/* General macros:
Note: SLJIT is designed to be independent from them as possible.
In release mode (SLJIT_DEBUG is not defined) only the following
external functions are needed:
*/
#ifndef SLJIT_MALLOC
#define SLJIT_MALLOC(size, allocator_data) malloc(size)
#endif
#ifndef SLJIT_FREE
#define SLJIT_FREE(ptr, allocator_data) free(ptr)
#endif
#ifndef SLJIT_MEMCPY
#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
#endif
#ifndef SLJIT_ZEROMEM
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
#endif
/***************************/
/* Compiler helper macros. */
/***************************/
#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY)
#if defined(__GNUC__) && (__GNUC__ >= 3)
#define SLJIT_LIKELY(x) __builtin_expect((x), 1)
#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0)
#else
#define SLJIT_LIKELY(x) (x)
#define SLJIT_UNLIKELY(x) (x)
#endif
#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
#ifndef SLJIT_INLINE
/* Inline functions. Some old compilers do not support them. */
#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
#define SLJIT_INLINE
#else
#define SLJIT_INLINE __inline
#endif
#endif /* !SLJIT_INLINE */
#ifndef SLJIT_NOINLINE
/* Not inline functions. */
#if defined(__GNUC__)
#define SLJIT_NOINLINE __attribute__ ((noinline))
#else
#define SLJIT_NOINLINE
#endif
#endif /* !SLJIT_INLINE */
#ifndef SLJIT_UNUSED_ARG
/* Unused arguments. */
#define SLJIT_UNUSED_ARG(arg) (void)arg
#endif
/*********************************/
/* Type of public API functions. */
/*********************************/
#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
/* Static ABI functions. For all-in-one programs. */
#if defined(__GNUC__)
/* Disable unused warnings in gcc. */
#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused))
#else
#define SLJIT_API_FUNC_ATTRIBUTE static
#endif
#else
#define SLJIT_API_FUNC_ATTRIBUTE
#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */
/****************************/
/* Instruction cache flush. */
/****************************/
#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
#if __has_builtin(__builtin___clear_cache)
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)from, (char*)to)
#endif /* __has_builtin(__builtin___clear_cache) */
#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */
#ifndef SLJIT_CACHE_FLUSH
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to)
#elif defined __APPLE__
/* Supported by all macs since Mac OS 10.5.
However, it does not work on non-jailbroken iOS devices,
although the compilation is successful. */
#define SLJIT_CACHE_FLUSH(from, to) \
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)from, (char*)to)
#elif defined __ANDROID__
/* Android lacks __clear_cache; instead, cacheflush should be used. */
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
#define SLJIT_CACHE_FLUSH(from, to) \
ppc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
#define SLJIT_CACHE_FLUSH(from, to) \
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
#define SLJIT_CACHE_FLUSH(from, to) \
__clear_cache((char*)(from), (char*)(to))
#endif
#endif /* !SLJIT_CACHE_FLUSH */
/******************************************************/
/* Integer and floating point type definitions. */
/******************************************************/
/* 8 bit byte type. */
typedef unsigned char sljit_u8;
typedef signed char sljit_s8;
/* 16 bit half-word type. */
typedef unsigned short int sljit_u16;
typedef signed short int sljit_s16;
/* 32 bit integer type. */
typedef unsigned int sljit_u32;
typedef signed int sljit_s32;
/* Machine word type. Enough for storing a pointer.
32 bit for 32 bit machines.
64 bit for 64 bit machines. */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
/* Just to have something. */
#define SLJIT_WORD_SHIFT 0
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
typedef unsigned int sljit_uw;
typedef int sljit_sw;
#else
#define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3
#ifdef _WIN32
typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw;
#else
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
#endif
#endif
typedef sljit_uw sljit_p;
/* Floating point types. */
typedef float sljit_f32;
typedef double sljit_f64;
/* Shift for pointer sized data. */
#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT
/* Shift for double precision sized data. */
#define SLJIT_F32_SHIFT 2
#define SLJIT_F64_SHIFT 3
#ifndef SLJIT_W
/* Defining long constants. */
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#define SLJIT_W(w) (w##ll)
#else
#define SLJIT_W(w) (w)
#endif
#endif /* !SLJIT_W */
/*************************/
/* Endianness detection. */
/*************************/
#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
/* These macros are mostly useful for the applications. */
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#ifdef __LITTLE_ENDIAN__
#define SLJIT_LITTLE_ENDIAN 1
#else
#define SLJIT_BIG_ENDIAN 1
#endif
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
#ifdef __MIPSEL__
#define SLJIT_LITTLE_ENDIAN 1
#else
#define SLJIT_BIG_ENDIAN 1
#endif
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
#define SLJIT_BIG_ENDIAN 1
#else
#define SLJIT_LITTLE_ENDIAN 1
#endif
#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */
/* Sanity check. */
#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
#error "Exactly one endianness must be selected"
#endif
#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
#error "Exactly one endianness must be selected"
#endif
#ifndef SLJIT_UNALIGNED
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#define SLJIT_UNALIGNED 1
#endif
#endif /* !SLJIT_UNALIGNED */
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Auto detect SSE2 support using CPUID.
On 64 bit x86 cpus, sse2 must be present. */
#define SLJIT_DETECT_SSE2 1
#endif
/*****************************************************************************************/
/* Calling convention of functions generated by SLJIT or called from the generated code. */
/*****************************************************************************************/
#ifndef SLJIT_CALL
#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION)
/* Force cdecl. */
#define SLJIT_CALL
#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if defined(__GNUC__) && !defined(__APPLE__)
#define SLJIT_CALL __attribute__ ((fastcall))
#define SLJIT_X86_32_FASTCALL 1
#elif defined(_MSC_VER)
#define SLJIT_CALL __fastcall
#define SLJIT_X86_32_FASTCALL 1
#elif defined(__BORLANDC__)
#define SLJIT_CALL __msfastcall
#define SLJIT_X86_32_FASTCALL 1
#else /* Unknown compiler. */
/* The cdecl attribute is the default. */
#define SLJIT_CALL
#endif
#else /* Non x86-32 architectures. */
#define SLJIT_CALL
#endif /* SLJIT_CONFIG_X86_32 */
#endif /* !SLJIT_CALL */
#ifndef SLJIT_INDIRECT_CALL
#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
/* It seems certain ppc compilers use an indirect addressing for functions
which makes things complicated. */
#define SLJIT_INDIRECT_CALL 1
#endif
#endif /* SLJIT_INDIRECT_CALL */
/* The offset which needs to be substracted from the return address to
determine the next executed instruction after return. */
#ifndef SLJIT_RETURN_ADDRESS_OFFSET
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
#define SLJIT_RETURN_ADDRESS_OFFSET 8
#else
#define SLJIT_RETURN_ADDRESS_OFFSET 0
#endif
#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
/***************************************************/
/* Functions of the built-in executable allocator. */
/***************************************************/
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
#else
#define SLJIT_EXEC_OFFSET(ptr) 0
#endif
#endif
/**********************************************/
/* Registers and locals offset determination. */
/**********************************************/
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#define SLJIT_NUMBER_OF_REGISTERS 10
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw))
#else
/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw))
#endif /* SLJIT_X86_32_FASTCALL */
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#ifndef _WIN64
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw))
#else
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw))
#endif /* _WIN64 */
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
#define SLJIT_NUMBER_OF_REGISTERS 11
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
#define SLJIT_NUMBER_OF_REGISTERS 11
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#define SLJIT_NUMBER_OF_REGISTERS 25
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
#define SLJIT_NUMBER_OF_REGISTERS 22
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
/* Add +1 for double alignment. */
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw))
#else
#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw))
#endif /* SLJIT_CONFIG_PPC_64 || _AIX */
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
#define SLJIT_NUMBER_OF_REGISTERS 17
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
#else
#define SLJIT_LOCALS_OFFSET_BASE 0
#endif
#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
#define SLJIT_NUMBER_OF_REGISTERS 18
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* Add +1 for double alignment. */
#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw))
#endif
#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
#define SLJIT_NUMBER_OF_REGISTERS 10
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 5
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#define SLJIT_NUMBER_OF_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE 0
#endif
#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE)
#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
(SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
#else
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#endif
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
/*************************************/
/* Debug and verbose related macros. */
/*************************************/
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
#include <stdio.h>
#endif
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
/* SLJIT_HALT_PROCESS must halt the process. */
#ifndef SLJIT_HALT_PROCESS
#include <stdlib.h>
#define SLJIT_HALT_PROCESS() \
abort();
#endif /* !SLJIT_HALT_PROCESS */
#include <stdio.h>
#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
/* Feel free to redefine these two macros. */
#ifndef SLJIT_ASSERT
#define SLJIT_ASSERT(x) \
do { \
if (SLJIT_UNLIKELY(!(x))) { \
printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \
SLJIT_HALT_PROCESS(); \
} \
} while (0)
#endif /* !SLJIT_ASSERT */
#ifndef SLJIT_ASSERT_STOP
#define SLJIT_ASSERT_STOP() \
do { \
printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
SLJIT_HALT_PROCESS(); \
} while (0)
#endif /* !SLJIT_ASSERT_STOP */
#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
/* Forcing empty, but valid statements. */
#undef SLJIT_ASSERT
#undef SLJIT_ASSERT_STOP
#define SLJIT_ASSERT(x) \
do { } while (0)
#define SLJIT_ASSERT_STOP() \
do { } while (0)
#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
#ifndef SLJIT_COMPILE_ASSERT
/* Should be improved eventually. */
#define SLJIT_COMPILE_ASSERT(x, description) \
SLJIT_ASSERT(x)
#endif /* !SLJIT_COMPILE_ASSERT */
#endif

View file

@ -0,0 +1,312 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This file contains a simple executable memory allocator
It is assumed, that executable code blocks are usually medium (or sometimes
large) memory blocks, and the allocator is not too frequently called (less
optimized than other allocators). Thus, using it as a generic allocator is
not suggested.
How does it work:
Memory is allocated in continuous memory areas called chunks by alloc_chunk()
Chunk format:
[ block ][ block ] ... [ block ][ block terminator ]
All blocks and the block terminator is started with block_header. The block
header contains the size of the previous and the next block. These sizes
can also contain special values.
Block size:
0 - The block is a free_block, with a different size member.
1 - The block is a block terminator.
n - The block is used at the moment, and the value contains its size.
Previous block size:
0 - This is the first block of the memory chunk.
n - The size of the previous block.
Using these size values we can go forward or backward on the block chain.
The unused blocks are stored in a chain list pointed by free_blocks. This
list is useful if we need to find a suitable memory area when the allocator
is called.
When a block is freed, the new free block is connected to its adjacent free
blocks if possible.
[ free block ][ used block ][ free block ]
and "used block" is freed, the three blocks are connected together:
[ one big free block ]
*/
/* --------------------------------------------------------------------- */
/* System (OS) functions */
/* --------------------------------------------------------------------- */
/* 64 KByte. */
#define CHUNK_SIZE 0x10000
/*
alloc_chunk / free_chunk :
* allocate executable system memory chunks
* the size is always divisible by CHUNK_SIZE
allocator_grab_lock / allocator_release_lock :
* make the allocator thread safe
* can be empty if the OS (or the application) does not support threading
* only the allocator requires this lock, sljit is fully thread safe
as it only uses local variables
*/
#ifdef _WIN32
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
{
return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
}
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
SLJIT_UNUSED_ARG(size);
VirtualFree(chunk, 0, MEM_RELEASE);
}
#else
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
{
void *retval;
#ifdef MAP_ANON
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero())
return NULL;
}
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
#endif
return (retval != MAP_FAILED) ? retval : NULL;
}
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
munmap(chunk, size);
}
#endif
/* --------------------------------------------------------------------- */
/* Common functions */
/* --------------------------------------------------------------------- */
#define CHUNK_MASK (~(CHUNK_SIZE - 1))
struct block_header {
sljit_uw size;
sljit_uw prev_size;
};
struct free_block {
struct block_header header;
struct free_block *next;
struct free_block *prev;
sljit_uw size;
};
#define AS_BLOCK_HEADER(base, offset) \
((struct block_header*)(((sljit_u8*)base) + offset))
#define AS_FREE_BLOCK(base, offset) \
((struct free_block*)(((sljit_u8*)base) + offset))
#define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header)))
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7)
static struct free_block* free_blocks;
static sljit_uw allocated_size;
static sljit_uw total_size;
static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
{
free_block->header.size = 0;
free_block->size = size;
free_block->next = free_blocks;
free_block->prev = NULL;
if (free_blocks)
free_blocks->prev = free_block;
free_blocks = free_block;
}
static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
{
if (free_block->next)
free_block->next->prev = free_block->prev;
if (free_block->prev)
free_block->prev->next = free_block->next;
else {
SLJIT_ASSERT(free_blocks == free_block);
free_blocks = free_block->next;
}
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
{
struct block_header *header;
struct block_header *next_header;
struct free_block *free_block;
sljit_uw chunk_size;
allocator_grab_lock();
if (size < (64 - sizeof(struct block_header)))
size = (64 - sizeof(struct block_header));
size = ALIGN_SIZE(size);
free_block = free_blocks;
while (free_block) {
if (free_block->size >= size) {
chunk_size = free_block->size;
if (chunk_size > size + 64) {
/* We just cut a block from the end of the free block. */
chunk_size -= size;
free_block->size = chunk_size;
header = AS_BLOCK_HEADER(free_block, chunk_size);
header->prev_size = chunk_size;
AS_BLOCK_HEADER(header, size)->prev_size = size;
}
else {
sljit_remove_free_block(free_block);
header = (struct block_header*)free_block;
size = chunk_size;
}
allocated_size += size;
header->size = size;
allocator_release_lock();
return MEM_START(header);
}
free_block = free_block->next;
}
chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK;
header = (struct block_header*)alloc_chunk(chunk_size);
if (!header) {
allocator_release_lock();
return NULL;
}
chunk_size -= sizeof(struct block_header);
total_size += chunk_size;
header->prev_size = 0;
if (chunk_size > size + 64) {
/* Cut the allocated space into a free and a used block. */
allocated_size += size;
header->size = size;
chunk_size -= size;
free_block = AS_FREE_BLOCK(header, size);
free_block->header.prev_size = size;
sljit_insert_free_block(free_block, chunk_size);
next_header = AS_BLOCK_HEADER(free_block, chunk_size);
}
else {
/* All space belongs to this allocation. */
allocated_size += chunk_size;
header->size = chunk_size;
next_header = AS_BLOCK_HEADER(header, chunk_size);
}
next_header->size = 1;
next_header->prev_size = chunk_size;
allocator_release_lock();
return MEM_START(header);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
{
struct block_header *header;
struct free_block* free_block;
allocator_grab_lock();
header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
allocated_size -= header->size;
/* Connecting free blocks together if possible. */
/* If header->prev_size == 0, free_block will equal to header.
In this case, free_block->header.size will be > 0. */
free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
if (SLJIT_UNLIKELY(!free_block->header.size)) {
free_block->size += header->size;
header = AS_BLOCK_HEADER(free_block, free_block->size);
header->prev_size = free_block->size;
}
else {
free_block = (struct free_block*)header;
sljit_insert_free_block(free_block, header->size);
}
header = AS_BLOCK_HEADER(free_block, free_block->size);
if (SLJIT_UNLIKELY(!header->size)) {
free_block->size += ((struct free_block*)header)->size;
sljit_remove_free_block((struct free_block*)header);
header = AS_BLOCK_HEADER(free_block, free_block->size);
header->prev_size = free_block->size;
}
/* The whole chunk is free. */
if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
/* If this block is freed, we still have (allocated_size / 2) free space. */
if (total_size - free_block->size > (allocated_size * 3 / 2)) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
free_chunk(free_block, free_block->size + sizeof(struct block_header));
}
}
allocator_release_lock();
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
{
struct free_block* free_block;
struct free_block* next_free_block;
allocator_grab_lock();
free_block = free_blocks;
while (free_block) {
next_free_block = free_block->next;
if (!free_block->header.prev_size &&
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
free_chunk(free_block, free_block->size + sizeof(struct block_header));
}
free_block = next_free_block;
}
SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
allocator_release_lock();
}

2067
thirdparty/pcre2/src/sljit/sljitLir.c vendored Normal file

File diff suppressed because it is too large Load diff

1269
thirdparty/pcre2/src/sljit/sljitLir.h vendored Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,368 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* mips 32-bit arch dependent functions. */
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
if (!(imm & ~0xffff))
return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
if (imm < 0 && imm >= SIMM_MIN)
return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
}
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_imm, op_v) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
#else
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
#endif
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
#else
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
#endif
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
#else
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
if (op & SLJIT_SET_E)
return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
#endif
return SLJIT_SUCCESS;
case SLJIT_ADD:
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_O) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (op & SLJIT_SET_O)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
if (!(op & SLJIT_SET_O))
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
case SLJIT_ADDC:
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_C) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
}
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
if (!(op & SLJIT_SET_C))
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_O) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (op & SLJIT_SET_O)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
if (op & SLJIT_SET_U)
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
if (op & SLJIT_SET_S) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
}
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!(op & SLJIT_SET_O))
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (!(op & SLJIT_SET_O)) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
#else
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif
}
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(SLL, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(SRL, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(SRA, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}

View file

@ -0,0 +1,471 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* mips 64-bit arch dependent functions. */
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
sljit_s32 shift = 32;
sljit_s32 shift2;
sljit_s32 inv = 0;
sljit_ins ins;
sljit_uw uimm;
if (!(imm & ~0xffff))
return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
if (imm < 0 && imm >= SIMM_MIN)
return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar);
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar));
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
}
/* Zero extended number. */
uimm = imm;
if (imm < 0) {
uimm = ~imm;
inv = 1;
}
while (!(uimm & 0xff00000000000000l)) {
shift -= 8;
uimm <<= 8;
}
if (!(uimm & 0xf000000000000000l)) {
shift -= 4;
uimm <<= 4;
}
if (!(uimm & 0xc000000000000000l)) {
shift -= 2;
uimm <<= 2;
}
if ((sljit_sw)uimm < 0) {
uimm >>= 1;
shift += 1;
}
SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32));
if (inv)
uimm = ~uimm;
FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
if (uimm & 0x0000ffff00000000l)
FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar));
imm &= (1l << shift) - 1;
if (!(imm & ~0xffff)) {
ins = (shift == 32) ? DSLL32 : DSLL;
if (shift < 32)
ins |= SH_IMM(shift);
FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar));
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
}
/* Double shifts needs to be performed. */
uimm <<= 32;
shift2 = shift - 16;
while (!(uimm & 0xf000000000000000l)) {
shift2 -= 4;
uimm <<= 4;
}
if (!(uimm & 0xc000000000000000l)) {
shift2 -= 2;
uimm <<= 2;
}
if (!(uimm & 0x8000000000000000l)) {
shift2--;
uimm <<= 1;
}
SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16));
FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar));
FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar));
FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar));
imm &= (1l << shift2) - 1;
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
}
#define SELECT_OP(a, b) \
(!(op & SLJIT_I32_OP) ? a : b)
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
if (flags & SRC2_IMM) { \
if (src2 >= 32) { \
SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \
ins = op_dimm32; \
src2 -= 32; \
} \
else \
ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \
if (op & SLJIT_SET_E) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (CHECK_FLAGS(SLJIT_SET_E)) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_ins ins;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8) {
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16) {
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_MOV_U32:
SLJIT_ASSERT(!(op & SLJIT_I32_OP));
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
case SLJIT_MOV_S32:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
#else
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
if (op & SLJIT_SET_E)
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
#endif
return SLJIT_SUCCESS;
case SLJIT_ADD:
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_O) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (op & SLJIT_SET_O)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
if (!(op & SLJIT_SET_O))
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
case SLJIT_ADDC:
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_C) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
}
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
if (!(op & SLJIT_SET_C))
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_O) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
}
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (op & (SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (op & SLJIT_SET_O)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
if (op & SLJIT_SET_E)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
if (op & SLJIT_SET_U)
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
if (op & SLJIT_SET_S) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
}
/* dst may be the same as src1 or src2. */
if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!(op & SLJIT_SET_O))
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (op & SLJIT_SET_C)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (!(op & SLJIT_SET_O)) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#else
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif
}
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst)));
FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst)));
FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,271 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* ppc 32-bit arch dependent functions. */
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
{
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
if (!(imm & ~0xffff))
return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
}
#define INS_CLEAR_LEFT(dst, src, from) \
(RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1))
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
{
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1);
if (dst != src2)
return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
}
else if ((flags & REG_DEST) && op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16)
return push_inst(compiler, EXTSH | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM2) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM4) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
}
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
case SLJIT_ADDC:
if (flags & ALT_FORM1) {
FAIL_IF(push_inst(compiler, MFXER | D(0)));
FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
return push_inst(compiler, MTXER | S(0));
}
return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
case SLJIT_SUB:
if (flags & ALT_FORM1) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
}
if (flags & (ALT_FORM2 | ALT_FORM3)) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2)
FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm));
if (flags & ALT_FORM3)
return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm);
return SLJIT_SUCCESS;
}
if (flags & (ALT_FORM4 | ALT_FORM5)) {
if (flags & ALT_FORM4)
FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
if (flags & ALT_FORM5)
FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)));
return SLJIT_SUCCESS;
}
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
if (flags & ALT_FORM6)
FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
case SLJIT_SUBC:
if (flags & ALT_FORM1) {
FAIL_IF(push_inst(compiler, MFXER | D(0)));
FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
return push_inst(compiler, MTXER | S(0));
}
return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
case SLJIT_MUL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
}
return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_AND:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
}
return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_OR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
}
return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_XOR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
}
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
}
return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_LSHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
}
return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
if (flags & ALT_FORM3)
FAIL_IF(push_inst(compiler, MFXER | D(0)));
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
}
else
FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)));
return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
}
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16)));
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}

View file

@ -0,0 +1,423 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* ppc 64-bit arch dependent functions. */
#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
#define ASM_SLJIT_CLZ(src, dst) \
__asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) )
#elif defined(__xlc__)
#error "Please enable GCC syntax for inline assembly statements"
#else
#error "Must implement count leading zeroes"
#endif
#define RLDI(dst, src, sh, mb, type) \
(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
#define PUSH_RLDICR(reg, shift) \
push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
{
sljit_uw tmp;
sljit_uw shift;
sljit_uw tmp2;
sljit_uw shift2;
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
if (!(imm & ~0xffff))
return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm));
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
}
/* Count leading zeroes. */
tmp = (imm >= 0) ? imm : ~imm;
ASM_SLJIT_CLZ(tmp, shift);
SLJIT_ASSERT(shift > 0);
shift--;
tmp = (imm << shift);
if ((tmp & ~0xffff000000000000ul) == 0) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
shift += 15;
return PUSH_RLDICR(reg, shift);
}
if ((tmp & ~0xffffffff00000000ul) == 0) {
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
shift += 31;
return PUSH_RLDICR(reg, shift);
}
/* Cut out the 16 bit from immediate. */
shift += 15;
tmp2 = imm & ((1ul << (63 - shift)) - 1);
if (tmp2 <= 0xffff) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
FAIL_IF(PUSH_RLDICR(reg, shift));
return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2);
}
if (tmp2 <= 0xffffffff) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
FAIL_IF(PUSH_RLDICR(reg, shift));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
}
ASM_SLJIT_CLZ(tmp2, shift2);
tmp2 <<= shift2;
if ((tmp2 & ~0xffff000000000000ul) == 0) {
FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
shift2 += 15;
shift += (63 - shift2);
FAIL_IF(PUSH_RLDICR(reg, shift));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48)));
return PUSH_RLDICR(reg, shift2);
}
/* The general version. */
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
FAIL_IF(PUSH_RLDICR(reg, 31));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
}
/* Simplified mnemonics: clrldi. */
#define INS_CLEAR_LEFT(dst, src, from) \
(RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
/* Sign extension for integer operations. */
#define UN_EXTS() \
if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \
FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
src2 = TMP_REG2; \
}
#define BIN_EXTS() \
if (flags & ALT_SIGN_EXT) { \
if (flags & REG1_SOURCE) { \
FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
src1 = TMP_REG1; \
} \
if (flags & REG2_SOURCE) { \
FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \
src2 = TMP_REG2; \
} \
}
#define BIN_IMM_EXTS() \
if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \
FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \
src1 = TMP_REG1; \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
{
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1);
if (dst != src2)
return push_inst(compiler, OR | S(src2) | A(dst) | B(src2));
return SLJIT_SUCCESS;
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S32)
return push_inst(compiler, EXTSW | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
}
else if ((flags & REG_DEST) && op == SLJIT_MOV_S8)
return push_inst(compiler, EXTSB | S(src2) | A(dst));
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16)
return push_inst(compiler, EXTSH | S(src2) | A(dst));
return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1);
UN_EXTS();
return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
UN_EXTS();
return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
if (flags & ALT_FORM1)
return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM2) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
BIN_IMM_EXTS();
return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
}
if (flags & ALT_FORM4) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
}
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
BIN_EXTS();
return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
case SLJIT_ADDC:
if (flags & ALT_FORM1) {
FAIL_IF(push_inst(compiler, MFXER | D(0)));
FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
return push_inst(compiler, MTXER | S(0));
}
BIN_EXTS();
return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
case SLJIT_SUB:
if (flags & ALT_FORM1) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
}
if (flags & (ALT_FORM2 | ALT_FORM3)) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2)
FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
if (flags & ALT_FORM3)
return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
return SLJIT_SUCCESS;
}
if (flags & (ALT_FORM4 | ALT_FORM5)) {
if (flags & ALT_FORM4)
FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
if (flags & ALT_FORM5)
return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
return SLJIT_SUCCESS;
}
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
BIN_EXTS();
if (flags & ALT_FORM6)
FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
case SLJIT_SUBC:
if (flags & ALT_FORM1) {
FAIL_IF(push_inst(compiler, MFXER | D(0)));
FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
return push_inst(compiler, MTXER | S(0));
}
BIN_EXTS();
return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
case SLJIT_MUL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
}
BIN_EXTS();
if (flags & ALT_FORM2)
return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_AND:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm);
}
return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_OR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
}
return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_XOR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM2) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm);
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
}
return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_SHL:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
}
else {
compiler->imm &= 0x3f;
return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
}
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_LSHR:
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
}
else {
compiler->imm &= 0x3f;
return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
}
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
if (flags & ALT_FORM3)
FAIL_IF(push_inst(compiler, MFXER | D(0)));
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
}
else {
compiler->imm &= 0x3f;
FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)));
}
}
else
FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)));
return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
}
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
FAIL_IF(PUSH_RLDICR(reg, 31));
FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,166 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm)
{
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst));
FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst)));
return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS;
}
#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
switch (op) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_U8)
return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst));
FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
}
else if (dst != src2)
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
/* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
/* Loop. */
FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
case SLJIT_ADD:
return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_ADDC:
return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_SUB:
return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_SUBC:
return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_MUL:
FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
if (!(flags & SET_FLAGS))
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK)));
return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS);
case SLJIT_AND:
return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_OR:
return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_XOR:
return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
case SLJIT_SHL:
FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
case SLJIT_LSHR:
FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
case SLJIT_ASHR:
FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
}
SLJIT_ASSERT_STOP();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,550 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* x86 32-bit arch dependent functions. */
static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
{
sljit_u8 *inst;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
FAIL_IF(!inst);
INC_SIZE(1 + sizeof(sljit_sw));
*inst++ = opcode;
sljit_unaligned_store_sw(inst, imm);
return SLJIT_SUCCESS;
}
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
{
if (type == SLJIT_JUMP) {
*code_ptr++ = JMP_i32;
jump->addr++;
}
else if (type >= SLJIT_FAST_CALL) {
*code_ptr++ = CALL_i32;
jump->addr++;
}
else {
*code_ptr++ = GROUP_0F;
*code_ptr++ = get_jump_code(type);
jump->addr += 2;
}
if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MW;
else
sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
code_ptr += 4;
return code_ptr;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 size;
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
compiler->args = args;
compiler->flags_saved = 0;
size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
#else
size += (args > 0 ? (2 + args * 3) : 0);
#endif
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
PUSH_REG(reg_map[TMP_REG1]);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) {
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
}
#endif
if (saveds > 2 || scratches > 7)
PUSH_REG(reg_map[SLJIT_S2]);
if (saveds > 1 || scratches > 8)
PUSH_REG(reg_map[SLJIT_S1]);
if (saveds > 0 || scratches > 9)
PUSH_REG(reg_map[SLJIT_S0]);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) {
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
}
if (args > 1) {
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
}
if (args > 2) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
*inst++ = 0x24;
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
}
#else
if (args > 0) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 2;
}
if (args > 1) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 3;
}
if (args > 2) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 4;
}
#endif
SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
#if defined(__APPLE__)
/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
if (options & SLJIT_DOUBLE_ALIGNMENT) {
local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
inst = (sljit_u8*)ensure_buf(compiler, 1 + 17);
FAIL_IF(!inst);
INC_SIZE(17);
inst[0] = MOV_r_rm;
inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
inst[2] = GROUP_F7;
inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
sljit_unaligned_store_sw(inst + 4, 0x4);
inst[8] = JNE_i8;
inst[9] = 6;
inst[10] = GROUP_BINARY_81;
inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
sljit_unaligned_store_sw(inst + 12, 0x4);
inst[16] = PUSH_r + reg_map[TMP_REG1];
}
else
local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
#endif
compiler->local_size = local_size;
#ifdef _WIN32
if (local_size > 1024) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
#else
local_size -= SLJIT_LOCALS_OFFSET;
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET));
#endif
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
}
#endif
SLJIT_ASSERT(local_size > 0);
return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
compiler->args = args;
#if defined(__APPLE__)
saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
if (options & SLJIT_DOUBLE_ALIGNMENT)
compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
else
compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
#endif
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
{
sljit_s32 size;
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_return(compiler, op, src, srcw));
SLJIT_ASSERT(compiler->args >= 0);
compiler->flags_saved = 0;
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
SLJIT_ASSERT(compiler->local_size > 0);
FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#if !defined(__APPLE__)
if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
FAIL_IF(!inst);
INC_SIZE(3);
inst[0] = MOV_r_rm;
inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */;
inst[2] = (4 << 3) | reg_map[SLJIT_SP];
}
#endif
size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
(compiler->saveds <= 3 ? compiler->saveds : 3);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->args > 2)
size += 2;
#else
if (compiler->args > 0)
size += 2;
#endif
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
if (compiler->saveds > 0 || compiler->scratches > 9)
POP_REG(reg_map[SLJIT_S0]);
if (compiler->saveds > 1 || compiler->scratches > 8)
POP_REG(reg_map[SLJIT_S1]);
if (compiler->saveds > 2 || compiler->scratches > 7)
POP_REG(reg_map[SLJIT_S2]);
POP_REG(reg_map[TMP_REG1]);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->args > 2)
RET_I16(sizeof(sljit_sw));
else
RET();
#else
RET();
#endif
return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
/* Size contains the flags as well. */
static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
/* The register or immediate operand. */
sljit_s32 a, sljit_sw imma,
/* The general operand (not immediate). */
sljit_s32 b, sljit_sw immb)
{
sljit_u8 *inst;
sljit_u8 *buf_ptr;
sljit_s32 flags = size & ~0xf;
sljit_s32 inst_size;
/* Both cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
/* Size flags not allowed for typed instructions. */
SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
size &= 0xf;
inst_size = size;
if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
inst_size++;
if (flags & EX86_PREF_66)
inst_size++;
/* Calculate size of b. */
inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if ((b & REG_MASK) == SLJIT_UNUSED)
inst_size += sizeof(sljit_sw);
else if (immb != 0 && !(b & OFFS_REG_MASK)) {
/* Immediate operand. */
if (immb <= 127 && immb >= -128)
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_sw);
}
if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
b |= TO_OFFS_REG(SLJIT_SP);
if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
inst_size += 1; /* SIB byte. */
}
/* Calculate size of a. */
if (a & SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
}
else if (flags & EX86_SHIFT_INS) {
imma &= 0x1f;
if (imma != 1) {
inst_size ++;
flags |= EX86_BYTE_ARG;
}
} else if (flags & EX86_BYTE_ARG)
inst_size++;
else if (flags & EX86_HALF_ARG)
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_sw);
}
else
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
PTR_FAIL_IF(!inst);
/* Encoding the byte. */
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
if (flags & EX86_PREF_66)
*inst++ = 0x66;
buf_ptr = inst + size;
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
if ((a & SLJIT_IMM) || (a == 0))
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_map[a] << 3;
else
*buf_ptr = a << 3;
}
else {
if (a & SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
*inst = GROUP_SHIFT_N;
} else
*inst = GROUP_SHIFT_CL;
*buf_ptr = 0;
}
if (!(b & SLJIT_MEM))
*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
else if ((b & REG_MASK) != SLJIT_UNUSED) {
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
if (immb != 0) {
if (immb <= 127 && immb >= -128)
*buf_ptr |= 0x40;
else
*buf_ptr |= 0x80;
}
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
*buf_ptr++ |= reg_map[b & REG_MASK];
else {
*buf_ptr++ |= 0x04;
*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
}
if (immb != 0) {
if (immb <= 127 && immb >= -128)
*buf_ptr++ = immb; /* 8 bit displacement. */
else {
sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw);
}
}
}
else {
*buf_ptr++ |= 0x04;
*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
}
}
else {
*buf_ptr++ |= 0x05;
sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw);
}
if (a & SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = imma;
else if (flags & EX86_HALF_ARG)
sljit_unaligned_store_s16(buf_ptr, imma);
else if (!(flags & EX86_SHIFT_INS))
sljit_unaligned_store_sw(buf_ptr, imma);
}
return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
}
/* --------------------------------------------------------------------- */
/* Call / return instructions */
/* --------------------------------------------------------------------- */
static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
{
sljit_u8 *inst;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
FAIL_IF(!inst);
INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
if (type >= SLJIT_CALL3)
PUSH_REG(reg_map[SLJIT_R2]);
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
#else
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
FAIL_IF(!inst);
INC_SIZE(4 * (type - SLJIT_CALL0));
*inst++ = MOV_rm_r;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
*inst++ = 0;
if (type >= SLJIT_CALL2) {
*inst++ = MOV_rm_r;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
*inst++ = sizeof(sljit_sw);
}
if (type >= SLJIT_CALL3) {
*inst++ = MOV_rm_r;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
*inst++ = 2 * sizeof(sljit_sw);
}
#endif
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
CHECK_EXTRA_REGS(dst, dstw, (void)0);
/* For UNUSED dst. Uncommon, but possible. */
if (dst == SLJIT_UNUSED)
dst = TMP_REG1;
if (FAST_IS_REG(dst)) {
/* Unused dest is possible here. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
POP_REG(reg_map[dst]);
return SLJIT_SUCCESS;
}
/* Memory. */
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
FAIL_IF(!inst);
*inst++ = POP_rm;
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
CHECK_EXTRA_REGS(src, srcw, (void)0);
if (FAST_IS_REG(src)) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1 + 1);
PUSH_REG(reg_map[src]);
}
else if (src & SLJIT_MEM) {
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_FF;
*inst |= PUSH_rm;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
}
else {
/* SLJIT_IMM. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
FAIL_IF(!inst);
INC_SIZE(5 + 1);
*inst++ = PUSH_i32;
sljit_unaligned_store_sw(inst, srcw);
inst += sizeof(sljit_sw);
}
RET();
return SLJIT_SUCCESS;
}

View file

@ -0,0 +1,725 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* x86 64-bit arch dependent functions. */
static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
{
sljit_u8 *inst;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
FAIL_IF(!inst);
INC_SIZE(2 + sizeof(sljit_sw));
*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
*inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
sljit_unaligned_store_sw(inst, imm);
return SLJIT_SUCCESS;
}
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
{
if (type < SLJIT_JUMP) {
/* Invert type. */
*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
*code_ptr++ = 10 + 3;
}
SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
*code_ptr++ = REX_W | REX_B;
*code_ptr++ = MOV_r_i32 + 1;
jump->addr = (sljit_uw)code_ptr;
if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MD;
else
sljit_unaligned_store_sw(code_ptr, jump->u.target);
code_ptr += sizeof(sljit_sw);
*code_ptr++ = REX_B;
*code_ptr++ = GROUP_FF;
*code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
return code_ptr;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 i, tmp, size, saved_register_size;
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
compiler->flags_saved = 0;
/* Including the return address saved by the call instruction. */
saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
for (i = SLJIT_S0; i >= tmp; i--) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
if (reg_map[i] >= 8)
*inst++ = REX_B;
PUSH_REG(reg_lmap[i]);
}
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
if (reg_map[i] >= 8)
*inst++ = REX_B;
PUSH_REG(reg_lmap[i]);
}
if (args > 0) {
size = args * 3;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
#ifndef _WIN64
if (args > 0) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
}
if (args > 1) {
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
}
if (args > 2) {
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
}
#else
if (args > 0) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
}
if (args > 1) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
}
if (args > 2) {
*inst++ = REX_W | REX_B;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
}
#endif
}
local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
compiler->local_size = local_size;
#ifdef _WIN64
if (local_size > 1024) {
/* Allocate stack for the callback, which grows the stack. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
FAIL_IF(!inst);
INC_SIZE(4 + (3 + sizeof(sljit_s32)));
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | 4;
/* Allocated size for registers must be divisible by 8. */
SLJIT_ASSERT(!(saved_register_size & 0x7));
/* Aligned to 16 byte. */
if (saved_register_size & 0x8) {
*inst++ = 5 * sizeof(sljit_sw);
local_size -= 5 * sizeof(sljit_sw);
} else {
*inst++ = 4 * sizeof(sljit_sw);
local_size -= 4 * sizeof(sljit_sw);
}
/* Second instruction */
SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
*inst++ = REX_W;
*inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
}
#endif
SLJIT_ASSERT(local_size > 0);
if (local_size <= 127) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | 4;
*inst++ = local_size;
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | SUB | 4;
sljit_unaligned_store_s32(inst, local_size);
inst += sizeof(sljit_s32);
}
#ifdef _WIN64
/* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
if (fscratches >= 6 || fsaveds >= 1) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
FAIL_IF(!inst);
INC_SIZE(5);
*inst++ = GROUP_0F;
sljit_unaligned_store_s32(inst, 0x20247429);
}
#endif
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 saved_register_size;
CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
/* Including the return address saved by the call instruction. */
saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
{
sljit_s32 i, tmp, size;
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_return(compiler, op, src, srcw));
compiler->flags_saved = 0;
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
#ifdef _WIN64
/* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
FAIL_IF(!inst);
INC_SIZE(5);
*inst++ = GROUP_0F;
sljit_unaligned_store_s32(inst, 0x20247428);
}
#endif
SLJIT_ASSERT(compiler->local_size > 0);
if (compiler->local_size <= 127) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | ADD | 4;
*inst = compiler->local_size;
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | ADD | 4;
sljit_unaligned_store_s32(inst, compiler->local_size);
}
tmp = compiler->scratches;
for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
if (reg_map[i] >= 8)
*inst++ = REX_B;
POP_REG(reg_lmap[i]);
}
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
for (i = tmp; i <= SLJIT_S0; i++) {
size = reg_map[i] >= 8 ? 2 : 1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
INC_SIZE(size);
if (reg_map[i] >= 8)
*inst++ = REX_B;
POP_REG(reg_lmap[i]);
}
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
RET();
return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
{
sljit_u8 *inst;
sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
FAIL_IF(!inst);
INC_SIZE(length);
if (rex)
*inst++ = rex;
*inst++ = opcode;
sljit_unaligned_store_s32(inst, imm);
return SLJIT_SUCCESS;
}
static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
/* The register or immediate operand. */
sljit_s32 a, sljit_sw imma,
/* The general operand (not immediate). */
sljit_s32 b, sljit_sw immb)
{
sljit_u8 *inst;
sljit_u8 *buf_ptr;
sljit_u8 rex = 0;
sljit_s32 flags = size & ~0xf;
sljit_s32 inst_size;
/* The immediate operand must be 32 bit. */
SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
/* Both cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
/* Size flags not allowed for typed instructions. */
SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
size &= 0xf;
inst_size = size;
if (!compiler->mode32 && !(flags & EX86_NO_REXW))
rex |= REX_W;
else if (flags & EX86_REX)
rex |= REX;
if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
inst_size++;
if (flags & EX86_PREF_66)
inst_size++;
/* Calculate size of b. */
inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK)) {
if (NOT_HALFWORD(immb)) {
if (emit_load_imm64(compiler, TMP_REG3, immb))
return NULL;
immb = 0;
if (b & REG_MASK)
b |= TO_OFFS_REG(TMP_REG3);
else
b |= TMP_REG3;
}
else if (reg_lmap[b & REG_MASK] == 4)
b |= TO_OFFS_REG(SLJIT_SP);
}
if ((b & REG_MASK) == SLJIT_UNUSED)
inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
else {
if (reg_map[b & REG_MASK] >= 8)
rex |= REX_B;
if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
/* Immediate operand. */
if (immb <= 127 && immb >= -128)
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_s32);
}
else if (reg_lmap[b & REG_MASK] == 5)
inst_size += sizeof(sljit_s8);
if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
inst_size += 1; /* SIB byte. */
if (reg_map[OFFS_REG(b)] >= 8)
rex |= REX_X;
}
}
}
else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
rex |= REX_B;
if (a & SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
}
else if (flags & EX86_SHIFT_INS) {
imma &= compiler->mode32 ? 0x1f : 0x3f;
if (imma != 1) {
inst_size ++;
flags |= EX86_BYTE_ARG;
}
} else if (flags & EX86_BYTE_ARG)
inst_size++;
else if (flags & EX86_HALF_ARG)
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_s32);
}
else {
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
rex |= REX_R;
}
if (rex)
inst_size++;
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
PTR_FAIL_IF(!inst);
/* Encoding the byte. */
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
if (flags & EX86_PREF_66)
*inst++ = 0x66;
if (rex)
*inst++ = rex;
buf_ptr = inst + size;
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
if ((a & SLJIT_IMM) || (a == 0))
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_lmap[a] << 3;
else
*buf_ptr = a << 3;
}
else {
if (a & SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
*inst = GROUP_SHIFT_N;
} else
*inst = GROUP_SHIFT_CL;
*buf_ptr = 0;
}
if (!(b & SLJIT_MEM))
*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
else if ((b & REG_MASK) != SLJIT_UNUSED) {
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
if (immb <= 127 && immb >= -128)
*buf_ptr |= 0x40;
else
*buf_ptr |= 0x80;
}
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
*buf_ptr++ |= reg_lmap[b & REG_MASK];
else {
*buf_ptr++ |= 0x04;
*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
}
if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
if (immb <= 127 && immb >= -128)
*buf_ptr++ = immb; /* 8 bit displacement. */
else {
sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32);
}
}
}
else {
if (reg_lmap[b & REG_MASK] == 5)
*buf_ptr |= 0x40;
*buf_ptr++ |= 0x04;
*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
if (reg_lmap[b & REG_MASK] == 5)
*buf_ptr++ = 0;
}
}
else {
*buf_ptr++ |= 0x04;
*buf_ptr++ = 0x25;
sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32);
}
if (a & SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = imma;
else if (flags & EX86_HALF_ARG)
sljit_unaligned_store_s16(buf_ptr, imma);
else if (!(flags & EX86_SHIFT_INS))
sljit_unaligned_store_s32(buf_ptr, imma);
}
return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
}
/* --------------------------------------------------------------------- */
/* Call / return instructions */
/* --------------------------------------------------------------------- */
static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
{
sljit_u8 *inst;
#ifndef _WIN64
SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
}
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
#else
SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
}
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
#endif
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
/* For UNUSED dst. Uncommon, but possible. */
if (dst == SLJIT_UNUSED)
dst = TMP_REG1;
if (FAST_IS_REG(dst)) {
if (reg_map[dst] < 8) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
POP_REG(reg_lmap[dst]);
return SLJIT_SUCCESS;
}
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
*inst++ = REX_B;
POP_REG(reg_lmap[dst]);
return SLJIT_SUCCESS;
}
/* REX_W is not necessary (src is not immediate). */
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
FAIL_IF(!inst);
*inst++ = POP_rm;
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
CHECK_ERROR();
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
if (FAST_IS_REG(src)) {
if (reg_map[src] < 8) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1 + 1);
PUSH_REG(reg_lmap[src]);
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
FAIL_IF(!inst);
INC_SIZE(2 + 1);
*inst++ = REX_B;
PUSH_REG(reg_lmap[src]);
}
}
else if (src & SLJIT_MEM) {
/* REX_W is not necessary (src is not immediate). */
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_FF;
*inst |= PUSH_rm;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst);
INC_SIZE(1);
}
else {
SLJIT_ASSERT(IS_HALFWORD(srcw));
/* SLJIT_IMM. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
FAIL_IF(!inst);
INC_SIZE(5 + 1);
*inst++ = PUSH_i32;
sljit_unaligned_store_s32(inst, srcw);
inst += sizeof(sljit_s32);
}
RET();
return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
/* Extend input */
/* --------------------------------------------------------------------- */
static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_u8* inst;
sljit_s32 dst_r;
compiler->mode32 = 0;
if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
return SLJIT_SUCCESS; /* Empty instruction. */
if (src & SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm_i32;
return SLJIT_SUCCESS;
}
return emit_load_imm64(compiler, dst, srcw);
}
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm_i32;
compiler->mode32 = 0;
return SLJIT_SUCCESS;
}
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
dst_r = src;
else {
if (sign) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = MOVSXD_r_rm;
} else {
compiler->mode32 = 1;
FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
compiler->mode32 = 0;
}
}
if (dst & SLJIT_MEM) {
compiler->mode32 = 1;
inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm_r;
compiler->mode32 = 0;
}
return SLJIT_SUCCESS;
}

File diff suppressed because it is too large Load diff

337
thirdparty/pcre2/src/sljit/sljitUtils.c vendored Normal file
View file

@ -0,0 +1,337 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* ------------------------------------------------------------------------ */
/* Locks */
/* ------------------------------------------------------------------------ */
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
static SLJIT_INLINE void allocator_grab_lock(void)
{
/* Always successful. */
}
static SLJIT_INLINE void allocator_release_lock(void)
{
/* Always successful. */
}
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
{
/* Always successful. */
}
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
{
/* Always successful. */
}
#endif /* SLJIT_UTIL_GLOBAL_LOCK */
#elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */
#include "windows.h"
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
static HANDLE allocator_mutex = 0;
static SLJIT_INLINE void allocator_grab_lock(void)
{
/* No idea what to do if an error occures. Static mutexes should never fail... */
if (!allocator_mutex)
allocator_mutex = CreateMutex(NULL, TRUE, NULL);
else
WaitForSingleObject(allocator_mutex, INFINITE);
}
static SLJIT_INLINE void allocator_release_lock(void)
{
ReleaseMutex(allocator_mutex);
}
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
static HANDLE global_mutex = 0;
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
{
/* No idea what to do if an error occures. Static mutexes should never fail... */
if (!global_mutex)
global_mutex = CreateMutex(NULL, TRUE, NULL);
else
WaitForSingleObject(global_mutex, INFINITE);
}
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
{
ReleaseMutex(global_mutex);
}
#endif /* SLJIT_UTIL_GLOBAL_LOCK */
#else /* _WIN32 */
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
#include <pthread.h>
static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER;
static SLJIT_INLINE void allocator_grab_lock(void)
{
pthread_mutex_lock(&allocator_mutex);
}
static SLJIT_INLINE void allocator_release_lock(void)
{
pthread_mutex_unlock(&allocator_mutex);
}
#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK)
#include <pthread.h>
static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void)
{
pthread_mutex_lock(&global_mutex);
}
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void)
{
pthread_mutex_unlock(&global_mutex);
}
#endif /* SLJIT_UTIL_GLOBAL_LOCK */
#endif /* _WIN32 */
/* ------------------------------------------------------------------------ */
/* Stack */
/* ------------------------------------------------------------------------ */
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
#ifdef _WIN32
#include "windows.h"
#else
/* Provides mmap function. */
#include <sys/mman.h>
/* For detecting the page size. */
#include <unistd.h>
#ifndef MAP_ANON
#include <fcntl.h>
/* Some old systems does not have MAP_ANON. */
static sljit_s32 dev_zero = -1;
#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
static SLJIT_INLINE sljit_s32 open_dev_zero(void)
{
dev_zero = open("/dev/zero", O_RDWR);
return dev_zero < 0;
}
#else /* SLJIT_SINGLE_THREADED */
#include <pthread.h>
static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER;
static SLJIT_INLINE sljit_s32 open_dev_zero(void)
{
pthread_mutex_lock(&dev_zero_mutex);
/* The dev_zero might be initialized by another thread during the waiting. */
if (dev_zero < 0) {
dev_zero = open("/dev/zero", O_RDWR);
}
pthread_mutex_unlock(&dev_zero_mutex);
return dev_zero < 0;
}
#endif /* SLJIT_SINGLE_THREADED */
#endif
#endif
#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
/* Planning to make it even more clever in the future. */
static sljit_sw sljit_page_align = 0;
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
{
struct sljit_stack *stack;
union {
void *ptr;
sljit_uw uw;
} base;
#ifdef _WIN32
SYSTEM_INFO si;
#endif
SLJIT_UNUSED_ARG(allocator_data);
if (limit > max_limit || limit < 1)
return NULL;
#ifdef _WIN32
if (!sljit_page_align) {
GetSystemInfo(&si);
sljit_page_align = si.dwPageSize - 1;
}
#else
if (!sljit_page_align) {
sljit_page_align = sysconf(_SC_PAGESIZE);
/* Should never happen. */
if (sljit_page_align < 0)
sljit_page_align = 4096;
sljit_page_align--;
}
#endif
/* Align limit and max_limit. */
max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data);
if (!stack)
return NULL;
#ifdef _WIN32
base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
if (!base.ptr) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
stack->base = base.uw;
stack->limit = stack->base;
stack->max_limit = stack->base + max_limit;
if (sljit_stack_resize(stack, stack->base + limit)) {
sljit_free_stack(stack, allocator_data);
return NULL;
}
#else
#ifdef MAP_ANON
base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero()) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
}
base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
#endif
if (base.ptr == MAP_FAILED) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
stack->base = base.uw;
stack->limit = stack->base + limit;
stack->max_limit = stack->base + max_limit;
#endif
stack->top = stack->base;
return stack;
}
#undef PAGE_ALIGN
SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data)
{
SLJIT_UNUSED_ARG(allocator_data);
#ifdef _WIN32
VirtualFree((void*)stack->base, 0, MEM_RELEASE);
#else
munmap((void*)stack->base, stack->max_limit - stack->base);
#endif
SLJIT_FREE(stack, allocator_data);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit)
{
sljit_uw aligned_old_limit;
sljit_uw aligned_new_limit;
if ((new_limit > stack->max_limit) || (new_limit < stack->base))
return -1;
#ifdef _WIN32
aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
if (aligned_new_limit != aligned_old_limit) {
if (aligned_new_limit > aligned_old_limit) {
if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE))
return -1;
}
else {
if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT))
return -1;
}
}
stack->limit = new_limit;
return 0;
#else
if (new_limit >= stack->limit) {
stack->limit = new_limit;
return 0;
}
aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
/* If madvise is available, we release the unnecessary space. */
#if defined(MADV_DONTNEED)
if (aligned_new_limit < aligned_old_limit)
madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED)
if (aligned_new_limit < aligned_old_limit)
posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED);
#endif
stack->limit = new_limit;
return 0;
#endif
}
#endif /* SLJIT_UTIL_STACK */
#endif