pcre2: Sync with upstream 10.32

This commit is contained in:
Rémi Verschelde 2019-03-04 14:25:49 +01:00
parent 2bc981948d
commit 0455bc64b6
37 changed files with 4929 additions and 4094 deletions

View file

@ -435,16 +435,16 @@ Files extracted from upstream source:
## pcre2
- Upstream: http://www.pcre.org/
- Version: 10.31
- Version: 10.32
- License: BSD-3-Clause
Files extracted from upstream source:
- Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
- All .h files in src/
- All .h files in src/ apart from pcre2posix.h
- src/pcre2_jit_compile.c
- src/pcre2_jit_match.c
- src/pcre2_jit_misc.c
- src/pcre2_jit_maketables.c
- src/sljit/*
- AUTHORS and LICENCE

View file

@ -4,11 +4,11 @@ PCRE2 LICENCE
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
specified below, with one exemption for certain binary redistributions. The
documentation for PCRE2, supplied in the "doc" directory, is distributed under
the same terms as the software itself. The data in the testdata directory is
not copyrighted and is in the public domain.
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
licence, as specified below, with one exemption for certain binary
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
is distributed under the same terms as the software itself. The data in the
testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Email domain: freemail.hu
Copyright(c) 2010-2018 Zoltan Herczeg
All rights reserved.
@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Email domain: freemail.hu
Copyright(c) 2009-2018 Zoltan Herczeg
All rights reserved.

View file

@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
such macros are listed as a commented #undef in config.h.generic. Macros such
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@ -132,17 +132,18 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes. */
/* This limits the amount of memory that may be used while matching a pattern.
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
#ifndef HEAP_LIMIT
#define HEAP_LIMIT 20000000
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
allows for longer patterns in extreme cases. */
compiled patterns up to 65535 code units long. This covers the vast
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
instead. This allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
@ -155,7 +156,8 @@ sure both macros are undefined; an emulation function will then be used. */
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
matching attempt. There is a runtime interface for setting a different
matching attempt. The value is also used to limit a loop counter in
pcre2_dfa_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
@ -170,7 +172,9 @@ sure both macros are undefined; an emulation function will then be used. */
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
limit. */
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
the internal nested function calls that are used for pattern recursions,
lookarounds, and atomic groups. */
#ifndef MATCH_LIMIT_DEPTH
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
#endif
@ -210,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 10.31"
#define PACKAGE_STRING "PCRE2 10.32"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@ -219,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "10.31"
#define PACKAGE_VERSION "10.32"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@ -339,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
#endif
/* Version number of package */
#define VERSION "10.31"
#define VERSION "10.32"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */

View file

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016-2017 University of Cambridge
Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 31
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2018-02-12
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 32
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2018-09-10
/* For the benefit of systems without stdint.h, an alternative is to use
inttypes.h. The existence of these headers is checked by configure or CMake. */
#define PCRE2_HAVE_STDINT_H 1
#define PCRE2_HAVE_INTTYPES_H 1
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION
#endif
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
uint8_t, UCHAR_MAX, etc are defined. */
/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
header, the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
#if PCRE2_HAVE_STDINT_H
#include <stdint.h>
#elif PCRE2_HAVE_INTTYPES_H
#include <inttypes.h>
#endif
/* Allow for C++ users compiling this directly. */
@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
/* Error 159 is obsolete and should now never occur */
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
#define PCRE2_ERROR_VERB_UNKNOWN 160
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
/* "Expected" matching error codes: no match and partial match. */
@ -387,6 +402,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
/* Request types for pcre2_pattern_info() */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -505,7 +505,7 @@ Arguments:
utf TRUE in UTF mode
cb compile data block
base_list the data list of the base opcode
base_end the end of the data list
base_end the end of the base opcode
rec_limit points to recursion depth counter
Returns: TRUE if the auto-possessification is possible
@ -730,7 +730,7 @@ for(;;)
if ((*xclass_flags & XCL_MAP) == 0)
{
/* No bits are set for characters < 256. */
if (list[1] == 0) return TRUE;
if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
/* Might be an empty repeat. */
continue;
}
@ -1235,6 +1235,7 @@ for (;;)
#endif
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:

View file

@ -2,23 +2,24 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE2 by the application that calls it. The tables are used only
for characters whose code values are less than 256.
/* This file was automatically written by the dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256. */
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE2) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
happens automatically.
/*The dftables program (which is distributed with PCRE2) can be used to
build alternative versions of this file. This is necessary if you are
running in an EBCDIC environment, or if you want to default to a different
encoding, for example ISO-8859-1. When dftables is run, it creates these
tables in the current locale. This happens automatically if PCRE2 is
configured with --enable-rebuild-chartables. */
The following #includes are present because without them gcc 4.x may remove the
array definition from the final binary if PCRE2 is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
/* The following #include is present because without it gcc 4.x may remove
the array definition from the final binary if PCRE2 is built into a static
library and dead code stripping is activated. This leads to link errors.
Pulling in the header ensures that the array gets flagged as "someone
outside this compilation unit might reference this" and so it will always
be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
graph print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Other debugging code can be enabled by these defines. */
// #define DEBUG_SHOW_CAPTURES
// #define DEBUG_SHOW_PARSED
/* #define DEBUG_SHOW_CAPTURES */
/* #define DEBUG_SHOW_PARSED */
/* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */
@ -250,34 +250,35 @@ is present where expected in a conditional group. */
#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
/* These must be kept in this order, with consecutive values, and the _ARG
versions of PRUNE, SKIP, and THEN immediately after their non-argument
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */
#define META_MARK 0x80260000u /* (*MARK) */
#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
#define META_COMMIT 0x80280000u /* (*COMMIT) */
#define META_FAIL 0x80290000u /* (*FAIL) */
#define META_PRUNE 0x802a0000u /* These pairs must */
#define META_PRUNE_ARG 0x802b0000u /* be */
#define META_SKIP 0x802c0000u /* kept */
#define META_SKIP_ARG 0x802d0000u /* in */
#define META_THEN 0x802e0000u /* this */
#define META_THEN_ARG 0x802f0000u /* order */
#define META_FAIL 0x80280000u /* (*FAIL) */
#define META_COMMIT 0x80290000u /* These */
#define META_COMMIT_ARG 0x802a0000u /* pairs */
#define META_PRUNE 0x802b0000u /* must */
#define META_PRUNE_ARG 0x802c0000u /* be */
#define META_SKIP 0x802d0000u /* kept */
#define META_SKIP_ARG 0x802e0000u /* in */
#define META_THEN 0x802f0000u /* this */
#define META_THEN_ARG 0x80300000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */
#define META_ASTERISK 0x80300000u /* * */
#define META_ASTERISK_PLUS 0x80310000u /* *+ */
#define META_ASTERISK_QUERY 0x80320000u /* *? */
#define META_PLUS 0x80330000u /* + */
#define META_PLUS_PLUS 0x80340000u /* ++ */
#define META_PLUS_QUERY 0x80350000u /* +? */
#define META_QUERY 0x80360000u /* ? */
#define META_QUERY_PLUS 0x80370000u /* ?+ */
#define META_QUERY_QUERY 0x80380000u /* ?? */
#define META_MINMAX 0x80390000u /* {n,m} repeat */
#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */
#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */
#define META_ASTERISK 0x80310000u /* * */
#define META_ASTERISK_PLUS 0x80320000u /* *+ */
#define META_ASTERISK_QUERY 0x80330000u /* *? */
#define META_PLUS 0x80340000u /* + */
#define META_PLUS_PLUS 0x80350000u /* ++ */
#define META_PLUS_QUERY 0x80360000u /* +? */
#define META_QUERY 0x80370000u /* ? */
#define META_QUERY_PLUS 0x80380000u /* ?+ */
#define META_QUERY_QUERY 0x80390000u /* ?? */
#define META_MINMAX 0x803a0000u /* {n,m} repeat */
#define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
#define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
#define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
SIZEOFFSET, /* META_LOOKBEHINDNOT */
1, /* META_MARK - plus the string length */
0, /* META_ACCEPT */
0, /* META_COMMIT */
0, /* META_FAIL */
0, /* META_COMMIT */
1, /* META_COMMIT_ARG - plus the string length */
0, /* META_PRUNE */
1, /* META_PRUNE_ARG - plus the string length */
0, /* META_SKIP */
@ -510,17 +512,17 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
CHAR_GRAVE_ACCENT, ESC_a,
CHAR_GRAVE_ACCENT, CHAR_BEL,
-ESC_b, 0,
-ESC_d, ESC_e,
ESC_f, 0,
-ESC_d, CHAR_ESC,
CHAR_FF, 0,
-ESC_h, 0,
0, -ESC_k,
0, 0,
ESC_n, 0,
CHAR_LF, 0,
-ESC_p, 0,
ESC_r, -ESC_s,
ESC_tee, 0,
CHAR_CR, -ESC_s,
CHAR_HT, 0,
-ESC_v, -ESC_w,
0, 0,
-ESC_z
@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif
static const short int escapes[] = {
/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P,
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* F8 */ 0, 0
/* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0,
/* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p,
/* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0,
/* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0,
/* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0,
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
/* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
/* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0,
/* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P,
/* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0,
/* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X,
/* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0,
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* F8 */ 0, 0
};
/* We also need a table of characters that may follow \c in an EBCDIC
@ -586,9 +588,9 @@ static const char verbnames[] =
"\0" /* Empty name is a shorthand for MARK */
STRING_MARK0
STRING_ACCEPT0
STRING_COMMIT0
STRING_F0
STRING_FAIL0
STRING_COMMIT0
STRING_PRUNE0
STRING_SKIP0
STRING_THEN;
@ -596,11 +598,11 @@ static const char verbnames[] =
static const verbitem verbs[] = {
{ 0, META_MARK, +1 }, /* > 0 => must have an argument */
{ 4, META_MARK, +1 },
{ 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */
{ 6, META_COMMIT, -1 },
{ 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */
{ 1, META_FAIL, -1 },
{ 4, META_FAIL, -1 },
{ 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */
{ 6, META_COMMIT, 0 },
{ 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */
{ 4, META_SKIP, 0 },
{ 4, META_THEN, 0 }
};
@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
/* Verb opcodes, indexed by their META code offset from META_MARK. */
static const uint32_t verbops[] = {
OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91, ERR92};
ERR91, ERR92, ERR93, ERR94 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -976,8 +978,8 @@ for (;;)
case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
@ -1067,6 +1069,10 @@ for (;;)
fprintf(stderr, "META (*MARK:");
goto SHOWARG;
case META_COMMIT_ARG:
fprintf(stderr, "META (*COMMIT:");
goto SHOWARG;
case META_PRUNE_ARG:
fprintf(stderr, "META (*PRUNE:");
goto SHOWARG;
@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
escape = -i; /* Else return a special escape */
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
/* Perl supports \N{name} for character names and \N{U+dddd} for numerical
Unicode code points, as well as plain \N for "not newline". PCRE does not
support \N{name}. However, it does support quantification such as \N{2,3},
so if \N{ is not followed by U+dddd we check for a quantifier. */
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
PCRE2_SPTR p = ptr + 1;
/* \N{U+ can be handled by the \x{ code. However, this construction is
not valid in EBCDIC environments because it specifies a Unicode
character, not a codepoint in the local code. For example \N{U+0041}
must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
casing semantics for the entire pattern, so allow it only in UTF (i.e.
Unicode) mode. */
if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
{
#ifdef EBCDIC
*errorcodeptr = ERR93;
#else
if (utf)
{
ptr = p + 1;
escape = 0; /* Not a fancy escape after all */
goto COME_FROM_NU;
}
else *errorcodeptr = ERR93;
#endif
}
/* Give an error if what follows is not a quantifier, but don't override
an error set by the quantifier reader (e.g. number overflow). */
else
{
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
*errorcodeptr == 0)
*errorcodeptr = ERR37;
}
}
}
}
@ -1462,6 +1510,7 @@ else
/* A number of Perl escapes are not handled by PCRE. We give an explicit
error. */
case CHAR_F:
case CHAR_l:
case CHAR_L:
*errorcodeptr = ERR37;
@ -1719,6 +1768,9 @@ else
{
if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
#ifndef EBCDIC
COME_FROM_NU:
#endif
if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
{
*errorcodeptr = ERR78;
@ -1852,19 +1904,6 @@ else
}
}
/* Perl supports \N{name} for character names, as well as plain \N for "not
newline". PCRE does not support \N{name}. However, it does support
quantification such as \N{2,3}. */
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
ptrend - ptr > 2)
{
PCRE2_SPTR p = ptr + 1;
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
*errorcodeptr == 0)
*errorcodeptr = ERR37;
}
/* Set the pointer to the next character before returning. */
*ptrptr = ptr;
@ -2251,11 +2290,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
/* Of the options that are changeable within the pattern, these are tracked
during parsing. The rest are used from META_OPTIONS items when compiling. */
/* Options that are changeable within the pattern must be tracked during
parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
but all must be tracked so that META_OPTIONS items set the correct values for
the main compiling phase. */
#define PARSE_TRACKED_OPTIONS \
(PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint16_t nest_depth = 0;
int after_manual_callout = 0;
int expect_cond_assert = 0;
@ -2434,11 +2477,17 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
/* OR: character > 255 */
c > 255 ||
/* OR: not a # comment or white space */
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
))
#ifdef SUPPORT_UNICODE
/* OR: character > 255 AND not Unicode Pattern White Space */
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
#endif
/* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL
#endif
)))
{
PCRE2_SIZE verbnamelength;
@ -2461,6 +2510,16 @@ while (ptr < ptrend)
goto FAILED;
}
*verblengthptr = (uint32_t)verbnamelength;
/* If this name was on a verb such as (*ACCEPT) which does not continue,
a (*MARK) was generated for the name. We now add the original verb as the
next item. */
if (add_after_mark != 0)
{
*parsed_pattern++ = add_after_mark;
add_after_mark = 0;
}
break;
case CHAR_BACKSLASH:
@ -2510,11 +2569,18 @@ while (ptr < ptrend)
/* Skip over whitespace and # comments in extended mode. Note that c is a
character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
#ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@ -3206,7 +3272,6 @@ while (ptr < ptrend)
tempptr = ptr;
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
options, TRUE, cb);
if (errorcode != 0)
{
CLASS_ESCAPE_FAILED:
@ -3454,13 +3519,25 @@ while (ptr < ptrend)
if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
{
if (verbs[i].has_arg < 0) /* Argument is forbidden */
/* Some optional arguments can be treated as a preceding (*MARK) */
if (verbs[i].has_arg < 0)
{
errorcode = ERR59;
goto FAILED;
add_after_mark = verbs[i].meta;
*parsed_pattern++ = META_MARK;
}
*parsed_pattern++ = verbs[i].meta +
((verbs[i].meta != META_MARK)? 0x00010000u:0);
/* The remaining verbs with arguments (except *MARK) need a different
opcode. */
else
{
*parsed_pattern++ = verbs[i].meta +
((verbs[i].meta != META_MARK)? 0x00010000u:0);
}
/* Set up for reading the name in the main loop. */
verblengthptr = parsed_pattern++;
verbnamestart = ptr;
inverbname = TRUE;
@ -3521,17 +3598,39 @@ while (ptr < ptrend)
else
{
BOOL hyphenok = TRUE;
uint32_t oldoptions = options;
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
optset = &set;
/* ^ at the start unsets imnsx and disables the subsequent use of - */
if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
{
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
hyphenok = FALSE;
ptr++;
}
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
*ptr != CHAR_COLON)
{
switch (*ptr++)
{
case CHAR_MINUS: optset = &unset; break;
case CHAR_MINUS:
if (!hyphenok)
{
errorcode = ERR94;
ptr--; /* Correct the offset */
goto FAILED;
}
optset = &unset;
hyphenok = FALSE;
break;
case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE2_DUPNAMES;
@ -3591,7 +3690,7 @@ while (ptr < ptrend)
/* If nothing changed, no need to record. */
if (set != 0 || unset != 0)
if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;
@ -3896,9 +3995,8 @@ while (ptr < ptrend)
if (*ptr == CHAR_DOT)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
goto FAILED;
if (minor < 10) minor *= 10;
minor = (*ptr++ - CHAR_0) * 10;
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
}
@ -4261,11 +4359,11 @@ goto FAILED;
/*************************************************
* Find first significant op code *
* Find first significant opcode *
*************************************************/
/* This is called by several functions that scan a compiled expression looking
for a fixed first character, or an anchoring op code etc. It skips over things
for a fixed first character, or an anchoring opcode etc. It skips over things
that do not influence this. For some calls, it makes sense to skip negative
forward and all backward assertions, and also the \b assertion; for others it
does not.
@ -5472,7 +5570,7 @@ for (;; pptr++)
set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
of the extra data and reset the pointer. This is so that very large
classes that contain a zillion wide characters or Unicode property tests
do not overwrite the work space (which is on the stack). */
do not overwrite the workspace (which is on the stack). */
if (class_uchardata > class_uchardata_base)
{
@ -5563,7 +5661,7 @@ for (;; pptr++)
if (class_has_8bitchar > 0)
{
*code++ |= XCL_MAP;
memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop)
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@ -5655,6 +5753,7 @@ for (;; pptr++)
cb->had_pruneorskip = TRUE;
/* Fall through */
case META_MARK:
case META_COMMIT_ARG:
VERB_ARG:
*code++ = verbops[(meta - META_MARK) >> 16];
/* The length is in characters. */
@ -6509,7 +6608,7 @@ for (;; pptr++)
/* Wrap the recursion call in OP_BRA brackets. */
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
(void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
op_previous = *previous = OP_BRA;
PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET;
@ -6589,7 +6688,7 @@ for (;; pptr++)
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{
memmove(previous + 1, previous, CU2BYTES(len));
(void)memmove(previous + 1, previous, CU2BYTES(len));
code++;
if (repeat_max == 0)
{
@ -6610,7 +6709,7 @@ for (;; pptr++)
else
{
int linkoffset;
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
(void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA;
@ -6811,7 +6910,7 @@ for (;; pptr++)
if (*bracode == OP_COND || *bracode == OP_SCOND)
{
int nlen = (int)(code - bracode);
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
(void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@ -7082,7 +7181,7 @@ for (;; pptr++)
else
{
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
(void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE;
@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
beginning. We accumulate in a local variable to save frequent testing of
lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
start and end of each alternative, because compiled items are discarded during
the pre-compile phase so that the work space is not exceeded. */
the pre-compile phase so that the workspace is not exceeded. */
length = 2 + 2*LINK_SIZE + skipunits;
@ -7622,7 +7721,7 @@ for (;;)
{
if (cb->open_caps->flag)
{
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
(void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE;
code += 1 + LINK_SIZE;
@ -7765,10 +7864,11 @@ do {
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
}
/* Condition */
/* Condition. If there is no second branch, it can't be anchored. */
else if (op == OP_COND)
else if (op == OP_COND || op == OP_SCOND)
{
if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@ -8003,6 +8103,7 @@ for (;;)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
if (crc < 0)
{
memmove(slot + cb->name_entry_size, slot,
(void)memmove(slot + cb->name_entry_size, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size));
break;
}
@ -8311,6 +8412,7 @@ for (;; pptr++)
break;
case META_MARK: /* Add the length of the name. */
case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@ -8501,6 +8603,7 @@ for (;; pptr++)
goto EXIT;
case META_MARK:
case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@ -8572,6 +8675,32 @@ for (;; pptr++)
case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED;
/* Also ignore any qualifiers that follow a lookahead assertion. */
switch (pptr[1])
{
case META_ASTERISK:
case META_ASTERISK_PLUS:
case META_ASTERISK_QUERY:
case META_PLUS:
case META_PLUS_PLUS:
case META_PLUS_QUERY:
case META_QUERY:
case META_QUERY_PLUS:
case META_QUERY_QUERY:
pptr++;
break;
case META_MINMAX:
case META_MINMAX_PLUS:
case META_MINMAX_QUERY:
pptr += 3;
break;
default:
break;
}
break;
/* Lookbehinds can be ignored, but must themselves be checked. */
@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
break;
case META_MARK:
case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
uint32_t pattype = options & TYPE_OPTIONS;
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
(pattype & (~pattype+1)) != pattype || /* More than one type set */
pattype == 0) /* No type set */
{
*bufflenptr = 0; /* Error offset */
*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_BADOPTION;
}
@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext =
/* Check UTF if required. */
#ifndef SUPPORT_UNICODE
if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
if (utf)
{
*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
}
#else
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
{
@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++)
break;
default:
*bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_INTERNAL;
}

View file

@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@ -292,6 +294,35 @@ typedef struct stateblock {
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
local working space and output vectors that were created on the stack. This has
caused issues for some patterns, especially in small-stack environments such as
Windows. A new scheme is now in use which sets up a vector on the stack, but if
this is too small, heap memory is used, up to the heap_limit. The main
parameters are all numbers of ints because the workspace is a vector of ints.
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
defined in pcre2_internal.h so as to be available to pcre2test when it is
finding the minimum heap requirement for a match. */
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
#define RWS_RSIZE 1000 /* Work size for recursion */
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
/* This structure is at the start of each workspace block. */
typedef struct RWS_anchor {
struct RWS_anchor *next;
unsigned int size; /* Number of ints */
unsigned int free; /* Number of ints */
} RWS_anchor;
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
/*************************************************
* Process a callout *
@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data);
/*************************************************
* Expand local workspace memory *
*************************************************/
/* This function is called when internal_dfa_match() is about to be called
recursively and there is insufficient working space left in the current
workspace block. If there's an existing next block, use it; otherwise get a new
block unless the heap limit is reached.
Arguments:
rwsptr pointer to block pointer (updated)
ovecsize space needed for an ovector
mb the match block
Returns: 0 rwsptr has been updated
!0 an error code
*/
static int
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
{
RWS_anchor *rws = *rwsptr;
RWS_anchor *new;
if (rws->next != NULL)
{
new = rws->next;
}
/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
kibibytes. */
else
{
unsigned int newsize = rws->size * 2;
unsigned int heapleft = (unsigned int)
(((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
if (newsize > heapleft) newsize = heapleft;
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
return PCRE2_ERROR_HEAPLIMIT;
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
mb->heap_used += newsize;
new->next = NULL;
new->size = newsize;
rws->next = new;
}
new->free = new->size - RWS_ANCHOR_SIZE;
*rwsptr = new;
return 0;
}
/*************************************************
* Match a Regular Expression - DFA engine *
*************************************************/
@ -431,7 +517,8 @@ internal_dfa_match(
uint32_t offsetcount,
int *workspace,
int wscount,
uint32_t rlevel)
uint32_t rlevel,
int *RWS)
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
@ -788,7 +875,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets,
if (count > 0) (void)memmove(offsets + 2, offsets,
(size_t)count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2)
{
@ -2587,10 +2674,22 @@ for (;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
{
PCRE2_SPTR endasscode = code + GET(code, 1);
PCRE2_SIZE local_offsets[2];
int rc;
int local_workspace[1000];
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR endasscode = code + GET(code, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@ -2600,10 +2699,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
@ -2615,8 +2717,6 @@ for (;;)
case OP_COND:
case OP_SCOND:
{
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
int codelink = (int)GET(code, 1);
PCRE2_UCHAR condcode;
@ -2673,8 +2773,22 @@ for (;;)
else
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@ -2684,10 +2798,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) ==
@ -2702,13 +2819,25 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_RECURSE:
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
dfa_recursion_info *ri;
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);
int rc;
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
/* Check for repeating a recursion without advancing the subject
pointer. This should catch convoluted mutual recursions. (Some simple
@ -2732,11 +2861,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
mb->recursive = new_recursive.prevrec; /* Done this recursion */
/* Ran out of internal offsets */
@ -2782,10 +2913,25 @@ for (;;)
case OP_SCBRAPOS:
case OP_BRAPOSZERO:
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SIZE charcount, matched_count;
PCRE2_SPTR local_ptr = ptr;
RWS_anchor *rws = (RWS_anchor *)RWS;
BOOL allow_zero;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
if (codevalue == OP_BRAPOSZERO)
{
allow_zero = TRUE;
@ -2798,19 +2944,17 @@ for (;;)
for (matched_count = 0;; matched_count++)
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_match(
rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
local_ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
/* Failed to match */
@ -2827,6 +2971,8 @@ for (;;)
local_ptr += charcount; /* Advance temporary position ptr */
}
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
/* At this point we have matched the subpattern matched_count
times, and local_ptr is pointing to the character after the end of the
last match. */
@ -2869,19 +3015,35 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
int rc = internal_dfa_match(
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc >= 0)
{
@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{
int rc;
const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match;
@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline;
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block;
/* Set up a starting block of memory for use during recursive calls to
internal_dfa_match(). By putting this on the stack, it minimizes resource use
in the case when it is not needed. If this is too small, more memory is
obtained from the heap. At the start of each block is an anchor structure.*/
int base_recursion_workspace[RWS_BASE_SIZE];
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
rws->next = NULL;
rws->size = RWS_BASE_SIZE;
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
@ -3184,6 +3358,7 @@ if (mcontext == NULL)
mb->memctl = re->memctl;
mb->match_limit = PRIV(default_match_context).match_limit;
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
mb->heap_limit = PRIV(default_match_context).heap_limit;
}
else
{
@ -3198,6 +3373,7 @@ else
mb->memctl = mcontext->memctl;
mb->match_limit = mcontext->match_limit;
mb->match_limit_depth = mcontext->depth_limit;
mb->heap_limit = mcontext->heap_limit;
}
if (mb->match_limit > re->limit_match)
@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
if (mb->match_limit_depth > re->limit_depth)
mb->match_limit_depth = re->limit_depth;
if (mb->heap_limit > re->limit_heap)
mb->heap_limit = re->limit_heap;
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->tables = re->tables;
@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
mb->moptions = options;
mb->poptions = re->overall_options;
mb->match_call_count = 0;
mb->heap_used = 0;
/* Process the \R and newline settings. */
@ -3351,8 +3531,6 @@ a match. */
for (;;)
{
int rc;
/* ----------------- Start of match optimizations ---------------- */
/* There are some optimizations that avoid running the match if a known
@ -3544,7 +3722,7 @@ for (;;)
in characters, we treat it as code units to avoid spending too much time
in this optimization. */
if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
/* If req_cu is set, we know that that code unit must appear in the
subject for the match to succeed. If the first code unit is set, req_cu
@ -3621,7 +3799,8 @@ for (;;)
(uint32_t)match_data->oveccount * 2, /* actual size of same */
workspace, /* workspace vector */
(int)wscount, /* size of same */
0); /* function recurse level */
0, /* function recurse level */
base_recursion_workspace); /* initial workspace for recursion */
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
@ -3637,7 +3816,7 @@ for (;;)
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
return rc;
goto EXIT;
}
/* Advance to the next subject character unless we are at the end of a line
@ -3668,8 +3847,18 @@ for (;;)
} /* "Bumpalong" loop */
NOMATCH_EXIT:
rc = PCRE2_ERROR_NOMATCH;
return PCRE2_ERROR_NOMATCH;
EXIT:
while (rws->next != NULL)
{
RWS_anchor *next = rws->next;
rws->next = next->next;
mb->memctl.free(next, mb->memctl.memory_data);
}
return rc;
}
/* End of pcre2_dfa_match.c */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] =
/* 35 */
"lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0"
/* 40 */
@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] =
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
"obsolete error (should not occur)\0" /* Was the above */
/* 60 */
"(*VERB) not recognized or malformed\0"
"group number is too big\0"
@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] =
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
"digits missing in \\x{} or \\o{} or \\N{U+}\0"
"syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] =
"internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0"
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
"invalid hyphen in option setting\0"
;
/* Match-time and UTF error texts are in the same format. */
@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] =
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
"match with end before start is not supported\0"
"match with end before start or start moved backwards is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
"invalid syntax\0"
/* 65 */
"internal error - duplicate substitution match\0"
;

View file

@ -129,11 +129,11 @@ while (eptr < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
any number of Extend before a following E_Modifier. */
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend ||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
eptr += len;

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -131,6 +131,7 @@ for (;;)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -165,6 +165,16 @@ by "configure". */
#define INT64_OR_DOUBLE double
#endif
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* When compiling for use with the Virtual Pascal compiler, these functions
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
option on the command line. */
@ -178,50 +188,15 @@ option on the command line. */
#define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
is set. Otherwise, include an emulating function for those systems that have
neither (there some non-Unix environments where this is the case). */
/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
a macro that calls an emulating function. */
#ifndef HAVE_MEMMOVE
#undef memmove /* some systems may have a macro */
#ifdef HAVE_BCOPY
#define memmove(a, b, c) bcopy(b, a, c)
#else /* HAVE_BCOPY */
static void *
pcre2_memmove(void *d, const void *s, size_t n)
{
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
}
#define memmove(a, b, c) pcre2_memmove(a, b, c)
#endif /* not HAVE_BCOPY */
#undef memmove /* Some systems may have a macro */
#define memmove(a, b, c) PRIV(memmove)(a, b, c)
#endif /* not HAVE_MEMMOVE */
#endif /* not VPCOMPAT */
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* This is an unsigned int value that no UTF character can ever have, as
Unicode doesn't go beyond 0x0010ffff. */
@ -247,12 +222,17 @@ not rely on this. */
pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
on the number of capturing parentheses) so 20K handles quite a few frames. A
on the number of capturing parentheses) so 20KiB handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */
#define START_FRAMES_SIZE 20480
/* Similarly, for DFA matching, an initial internal workspace vector is
allocated on the stack. */
#define DFA_START_RWS_SIZE 30720
/* Define the default BSR convention. */
#ifdef BSR_ANYCRLF
@ -585,14 +565,15 @@ these tables. */
#define cbit_cntrl 288 /* [:cntrl:] */
#define cbit_length 320 /* Length of the cbits table */
/* Bit definitions for entries in the ctypes table. */
/* Bit definitions for entries in the ctypes table. Do not change these values
without checking pcre2_jit_compile.c, which has an assertion to ensure that
ctype_word has the value 16. */
#define ctype_space 0x01
#define ctype_letter 0x02
#define ctype_digit 0x04
#define ctype_xdigit 0x08
#define ctype_xdigit 0x08 /* not actually used any more */
#define ctype_word 0x10 /* alphanumeric or '_' */
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
/* Offsets of the various tables from the base tables pointer, and
total length of the tables. */
@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* Escape items that are just an encoding of a particular data value. These
appear in the escapes[] table in pcre2_compile.c as positive numbers. */
#ifndef ESC_a
#define ESC_a CHAR_BEL
#endif
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. In the escapes[] table in pcre2_compile.c their values
@ -1578,23 +1529,26 @@ enum {
OP_THEN, /* 155 */
OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 157 */
OP_COMMIT_ARG, /* 158 same, but with argument */
/* These are forced failure and success verbs */
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
without the need for a special opcode. */
OP_FAIL, /* 158 */
OP_ACCEPT, /* 159 */
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
OP_FAIL, /* 159 */
OP_ACCEPT, /* 160 */
OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 162 */
OP_SKIPZERO, /* 163 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 163 */
OP_DEFINE, /* 164 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */
"Cond false", "Cond true", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
"*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
"*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero", "Define"
@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \
1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1, 3, /* COMMIT, COMMIT_ARG */ \
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */
@ -1896,7 +1851,7 @@ extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
#endif
extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];
extern const uint32_t PRIV(ucp_gentype)[];
@ -1976,6 +1931,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
/* This function is needed only when memmove() is not available. */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */

View file

@ -793,11 +793,23 @@ typedef struct heapframe {
uint8_t return_id; /* Where to go on in internal "return" */
uint8_t op; /* Processing opcode */
/* At this point, the structure is 16-bit aligned. On most architectures
the alignment requirement for a pointer will ensure that the eptr field below
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
that is 16-bit aligned. We must therefore ensure that what comes between here
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
fudges in the other cases. In the 32-bit case the padding comes first so that
the occu field itself is 32-bit aligned. Without the padding, this structure
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
#if PCRE2_CODE_UNIT_WIDTH == 8
PCRE2_UCHAR occu[6]; /* Used for other case code units */
#elif PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_UCHAR occu[2]; /* Used for other case code units */
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
#else
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
PCRE2_UCHAR occu[1]; /* Used for other case code units */
#endif
@ -818,6 +830,9 @@ typedef struct heapframe {
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} heapframe;
/* This typedef is a check that the size of the heapframe structure is a
multiple of PCRE2_SIZE. See various comments above. */
typedef char check_heapframe_size[
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
@ -881,6 +896,8 @@ typedef struct dfa_match_block {
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE heap_limit; /* As it says */
PCRE2_SIZE heap_used; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of calls of internal function */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -839,6 +839,7 @@ switch(*cc)
#endif
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@ -939,6 +940,7 @@ while (cc < ccend)
common->control_head_ptr = 1;
/* Fall through. */
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_MARK:
if (common->mark_ptr == 0)
@ -1553,6 +1555,7 @@ while (cc < ccend)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@ -1733,6 +1736,7 @@ while (cc < ccend)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@ -2041,6 +2045,7 @@ while (cc < ccend)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@ -2428,6 +2433,7 @@ while (cc < ccend)
break;
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@ -3666,7 +3672,8 @@ if (!common->utf)
#endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++)
}
}
if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
i = 0;
j = 0;
@ -6627,7 +6636,8 @@ if (needstype || needsscript)
#endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@ -7254,10 +7264,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
any number of Extend before a following E_Modifier. */
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
prevcc = cc;
@ -7309,10 +7320,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
any number of Extend before a following E_Modifier. */
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
cc++;
@ -10346,7 +10358,8 @@ backtrack_common *backtrack;
PCRE2_UCHAR opcode = *cc;
PCRE2_SPTR ccend = cc + 1;
if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
ccend += 2 + cc[1];
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP)
return ccend;
}
if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
@ -10677,6 +10690,7 @@ while (cc < ccend)
case OP_THEN:
case OP_THEN_ARG:
case OP_COMMIT:
case OP_COMMIT_ARG:
cc = compile_control_verb_matchingpath(common, cc, parent);
break;
@ -11751,6 +11765,7 @@ while (current)
break;
case OP_COMMIT:
case OP_COMMIT_ARG:
if (!common->local_quit_available)
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
if (common->quit_label == NULL)

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -141,13 +141,6 @@ for (i = 0; i < 256; i++)
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}

View file

@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
/* These defines enables debugging code */
/* These defines enable debugging code */
//#define DEBUG_FRAMES_DISPLAY
//#define DEBUG_SHOW_OPS
//#define DEBUG_SHOW_RMATCH
/* #define DEBUG_FRAMES_DISPLAY */
/* #define DEBUG_SHOW_OPS */
/* #define DEBUG_SHOW_RMATCH */
#ifdef DEBUG_FRAME_DISPLAY
#include <stdarg.h>
@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
RM31, RM32, RM33, RM34, RM35 };
RM31, RM32, RM33, RM34, RM35, RM36 };
#ifdef SUPPORT_WIDE_CHARS
enum { RM100=100, RM101 };
@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Real or forced end of the pattern, assertion, or recursion. In an
assertion ACCEPT, update the last used pointer and remember the current
frame so that the captures can be fished out of it. */
frame so that the captures and mark can be fished out of it. */
case OP_ASSERT_ACCEPT:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Match a bit-mapped character class, possibly repeatedly. These op codes
/* Match a bit-mapped character class, possibly repeatedly. These opcodes
are used when all the characters in the class have values in the range
0-255, and either the matching is caseful, or the characters are in the
range 0-127 when UTF processing is enabled. The only difference between
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
go too far. */
for (;;)
{
RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
BACKCHAR(Feptr);
}
}
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
go too far. */
for(;;)
{
RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(Feptr);
#endif
@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Match a single character type repeatedly. Note that the property type
does not need to be in a stack frame as it not used within an RMATCH()
does not need to be in a stack frame as it is not used within an RMATCH()
loop. */
#define Lstart_eptr F->temp_sptr[0]
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= pp to ensure backtracking doesn't go too far.
*/
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
go too far. */
for(;;)
{
@ -4135,7 +4143,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
/* The "byte" (i.e. "code unit") case is the same as non-UTF */
/* The "byte" (i.e. "code unit") case is the same as non-UTF */
case OP_ANYBYTE:
fc = Lmax - Lmin;
@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Positive assertions are like other groups except that PCRE doesn't allow
the effect of (*THEN) to escape beyond an assertion; it is therefore
treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
captures retained. Any other return is an error. */
captures and mark retained. Any other return is an error. */
#define Lframe_type F->temp_32[0]
@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
(char *)assert_accept_frame + offsetof(heapframe, ovector),
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
Foffset_top = assert_accept_frame->offset_top;
Fmark = assert_accept_frame->mark;
break;
}
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Feptr -= number;
}
/* Save the earliest consulted character, then skip to next op code */
/* Save the earliest consulted character, then skip to next opcode */
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
Fecode += 1 + LINK_SIZE;
@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
frame so that it points to the final branch. */
case OP_ONCE:
Fback_frame = ((char *)F - (char *)P) + frame_size;
Fback_frame = ((char *)F - (char *)P);
for (;;)
{
uint32_t y = GET(P->ecode,1);
@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
mb->verb_current_recurse = Fcurrent_recurse;
RRETURN(MATCH_COMMIT);
case OP_COMMIT_ARG:
Fmark = mb->nomatch_mark = Fecode + 2;
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
mb->verb_current_recurse = Fcurrent_recurse;
RRETURN(MATCH_COMMIT);
case OP_PRUNE:
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -5921,7 +5937,7 @@ in rrc. */
RETURN_SWITCH:
if (Frdepth == 0) return rrc; /* Exit from the top level */
F = (heapframe *)((char *)F - Fback_frame); /* Back track */
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
#ifdef DEBUG_SHOW_RMATCH
@ -5934,7 +5950,7 @@ switch (Freturn_id)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
LBL(33) LBL(34) LBL(35)
LBL(33) LBL(34) LBL(35) LBL(36)
#ifdef SUPPORT_WIDE_CHARS
LBL(100) LBL(101)
@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
/* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary, except when the heap limit prevents this. Get
fewer if possible. (The heap limit is in kilobytes.) */
fewer if possible. (The heap limit is in kibibytes.) */
if (frame_size <= START_FRAMES_SIZE/10)
{

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -390,6 +390,7 @@ while (TRUE)
#endif
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -127,7 +127,25 @@ dst_bytes += tables_length;
for (i = 0; i < number_of_codes; i++)
{
re = (const pcre2_real_code *)(codes[i]);
memcpy(dst_bytes, (char *)re, re->blocksize);
(void)memcpy(dst_bytes, (char *)re, re->blocksize);
/* Certain fields in the compiled code block are re-set during
deserialization. In order to ensure that the serialized data stream is always
the same for the same pattern, set them to zero here. We can't assume the
copy of the pattern is correctly aligned for accessing the fields as part of
a structure. Note the use of sizeof(void *) in the second of these, to
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
pointer to uint8_t), gcc gives a warning because the first argument is also a
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
it didn't stop Coverity giving the same complaint. */
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
sizeof(pcre2_memctl));
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
sizeof(void *));
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
sizeof(void *));
dst_bytes += re->blocksize;
}

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -50,6 +50,42 @@ functions work only on 8-bit data. */
#include "pcre2_internal.h"
/*************************************************
* Emulated memmove() for systems without it *
*************************************************/
/* This function can make use of bcopy() if it is available. Otherwise do it by
steam, as there some non-Unix environments that lack both memmove() and
bcopy(). */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
void *
PRIV(memmove)(void *d, const void *s, size_t n)
{
#ifdef HAVE_BCOPY
bcopy(s, d, n);
return d;
#else
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
#endif /* not HAVE_BCOPY */
}
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
/*************************************************
* Compare two zero-terminated PCRE2 strings *
*************************************************/

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -707,6 +707,7 @@ for (;;)
/* Skip these, but we need to add in the name length. */
case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@ -956,6 +957,7 @@ do
case OP_CIRCM:
case OP_CLOSE:
case OP_COMMIT:
case OP_COMMIT_ARG:
case OP_COND:
case OP_CREF:
case OP_FALSE:
@ -1274,7 +1276,7 @@ do
break;
/* Single character types set the bits and stop. Note that if PCRE2_UCP
is set, we do not see these op codes because \d etc are converted to
is set, we do not see these opcodes because \d etc are converted to
properties. Therefore, these apply in the case when only characters less
than 256 are recognized to match the types. */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -238,10 +238,12 @@ PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[3];
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
/* Partial matching is not valid. */
@ -361,13 +363,33 @@ do
}
/* Handle a successful match. Matches that use \K to end before they start
are not supported. */
if (ovector[1] < ovector[0])
or start before the current point in the subject are not supported. */
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
/* Check for the same match as previous. This is legitimate after matching an
empty string that starts after the initial match offset. We have tried again
at the match point in case the pattern is one like /(?<=\G.)/ which can never
match at its starting point, so running the match achieves the bumpalong. If
we do get the same (null) match at the original match point, it isn't such a
pattern, so we now do the empty string magic. In all other cases, a repeat
match should never occur. */
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
{
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
ovecsave[2] = start_offset;
continue; /* Back to the top of the loop */
}
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
goto EXIT;
}
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@ -799,13 +821,18 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
/* The replacement has been copied to the output. Update the start offset to
point to the rest of the subject string. If we matched an empty string,
do the magic for global matches. */
start_offset = ovector[1];
goptions = (ovector[0] != ovector[1])? 0 :
/* The replacement has been copied to the output. Save the details of this
match. See above for how this data is used. If we matched an empty string, do
the magic for global matches. Finally, update the start offset to point to
the rest of the subject string. */
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
ovecsave[2] = start_offset;
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
the properties of two adjacent code points. The left property selects a word
from the table, and the right property selects a bit from that word like this:
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
code points. The left property selects a word from the table, and the right
property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
@ -166,49 +167,41 @@ are implementing).
6. Do not break after Prepend characters.
7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
by E_Modifier). Extend characters are allowed before the modifier; this
cannot be represented in this table, the code has to deal with it.
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
is, do not break between characters with the Extended_Pictographic property.
Extend and ZWJ characters are allowed between the characters; this cannot be
represented in this table, the code has to deal with it.
8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
E_Base_GAZ).
9. Do not break within emoji flag sequences. That is, do not break between
8. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs.
10. Otherwise, break everywhere.
9. Otherwise, break everywhere.
*/
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
ESZ, /* 3 Extend */
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
ESZ, /* 3 Extend */
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
(1<<ucp_gbRegionalIndicator)|
(1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
(1<<ucp_gbE_Base_GAZ)|
(1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
ESZ, /* 5 SpacingMark */
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbRegionalIndicator),
ESZ, /* 5 SpacingMark */
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbLVT),
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
ESZ|(1<<ucp_gbT), /* 8 T */
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
ESZ|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
ESZ, /* 12 Other */
ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
ESZ, /* 14 E_Modifier */
ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
ESZ /* 12 Glue_After_Zwj */
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
ESZ|(1<<ucp_gbT), /* 8 T */
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
ESZ|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
ESZ, /* 12 Other */
ESZ, /* 13 ZWJ */
ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
};
#undef ESZ
@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
#define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
STRING_Dogra0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
STRING_Grantha0
STRING_Greek0
STRING_Gujarati0
STRING_Gunjala_Gondi0
STRING_Gurmukhi0
STRING_Han0
STRING_Hangul0
STRING_Hanifi_Rohingya0
STRING_Hanunoo0
STRING_Hatran0
STRING_Hebrew0
@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
STRING_Lydian0
STRING_M0
STRING_Mahajani0
STRING_Makasar0
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
STRING_Masaram_Gondi0
STRING_Mc0
STRING_Me0
STRING_Medefaidrin0
STRING_Meetei_Mayek0
STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0
@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
STRING_Old_North_Arabian0
STRING_Old_Permic0
STRING_Old_Persian0
STRING_Old_Sogdian0
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
STRING_Sk0
STRING_Sm0
STRING_So0
STRING_Sogdian0
STRING_Sora_Sompeng0
STRING_Soyombo0
STRING_Sundanese0
@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
{ 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari },
{ 293, PT_SC, ucp_Duployan },
{ 302, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 323, PT_SC, ucp_Elbasan },
{ 331, PT_SC, ucp_Ethiopic },
{ 340, PT_SC, ucp_Georgian },
{ 349, PT_SC, ucp_Glagolitic },
{ 360, PT_SC, ucp_Gothic },
{ 367, PT_SC, ucp_Grantha },
{ 375, PT_SC, ucp_Greek },
{ 381, PT_SC, ucp_Gujarati },
{ 390, PT_SC, ucp_Gurmukhi },
{ 399, PT_SC, ucp_Han },
{ 403, PT_SC, ucp_Hangul },
{ 410, PT_SC, ucp_Hanunoo },
{ 418, PT_SC, ucp_Hatran },
{ 425, PT_SC, ucp_Hebrew },
{ 432, PT_SC, ucp_Hiragana },
{ 441, PT_SC, ucp_Imperial_Aramaic },
{ 458, PT_SC, ucp_Inherited },
{ 468, PT_SC, ucp_Inscriptional_Pahlavi },
{ 490, PT_SC, ucp_Inscriptional_Parthian },
{ 513, PT_SC, ucp_Javanese },
{ 522, PT_SC, ucp_Kaithi },
{ 529, PT_SC, ucp_Kannada },
{ 537, PT_SC, ucp_Katakana },
{ 546, PT_SC, ucp_Kayah_Li },
{ 555, PT_SC, ucp_Kharoshthi },
{ 566, PT_SC, ucp_Khmer },
{ 572, PT_SC, ucp_Khojki },
{ 579, PT_SC, ucp_Khudawadi },
{ 589, PT_GC, ucp_L },
{ 591, PT_LAMP, 0 },
{ 594, PT_SC, ucp_Lao },
{ 598, PT_SC, ucp_Latin },
{ 604, PT_SC, ucp_Lepcha },
{ 611, PT_SC, ucp_Limbu },
{ 617, PT_SC, ucp_Linear_A },
{ 626, PT_SC, ucp_Linear_B },
{ 635, PT_SC, ucp_Lisu },
{ 640, PT_PC, ucp_Ll },
{ 643, PT_PC, ucp_Lm },
{ 646, PT_PC, ucp_Lo },
{ 649, PT_PC, ucp_Lt },
{ 652, PT_PC, ucp_Lu },
{ 655, PT_SC, ucp_Lycian },
{ 662, PT_SC, ucp_Lydian },
{ 669, PT_GC, ucp_M },
{ 671, PT_SC, ucp_Mahajani },
{ 680, PT_SC, ucp_Malayalam },
{ 690, PT_SC, ucp_Mandaic },
{ 698, PT_SC, ucp_Manichaean },
{ 709, PT_SC, ucp_Marchen },
{ 717, PT_SC, ucp_Masaram_Gondi },
{ 731, PT_PC, ucp_Mc },
{ 734, PT_PC, ucp_Me },
{ 737, PT_SC, ucp_Meetei_Mayek },
{ 750, PT_SC, ucp_Mende_Kikakui },
{ 764, PT_SC, ucp_Meroitic_Cursive },
{ 781, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 802, PT_SC, ucp_Miao },
{ 807, PT_PC, ucp_Mn },
{ 810, PT_SC, ucp_Modi },
{ 815, PT_SC, ucp_Mongolian },
{ 825, PT_SC, ucp_Mro },
{ 829, PT_SC, ucp_Multani },
{ 837, PT_SC, ucp_Myanmar },
{ 845, PT_GC, ucp_N },
{ 847, PT_SC, ucp_Nabataean },
{ 857, PT_PC, ucp_Nd },
{ 860, PT_SC, ucp_New_Tai_Lue },
{ 872, PT_SC, ucp_Newa },
{ 877, PT_SC, ucp_Nko },
{ 881, PT_PC, ucp_Nl },
{ 884, PT_PC, ucp_No },
{ 887, PT_SC, ucp_Nushu },
{ 893, PT_SC, ucp_Ogham },
{ 899, PT_SC, ucp_Ol_Chiki },
{ 908, PT_SC, ucp_Old_Hungarian },
{ 922, PT_SC, ucp_Old_Italic },
{ 933, PT_SC, ucp_Old_North_Arabian },
{ 951, PT_SC, ucp_Old_Permic },
{ 962, PT_SC, ucp_Old_Persian },
{ 974, PT_SC, ucp_Old_South_Arabian },
{ 992, PT_SC, ucp_Old_Turkic },
{ 1003, PT_SC, ucp_Oriya },
{ 1009, PT_SC, ucp_Osage },
{ 1015, PT_SC, ucp_Osmanya },
{ 1023, PT_GC, ucp_P },
{ 1025, PT_SC, ucp_Pahawh_Hmong },
{ 1038, PT_SC, ucp_Palmyrene },
{ 1048, PT_SC, ucp_Pau_Cin_Hau },
{ 1060, PT_PC, ucp_Pc },
{ 1063, PT_PC, ucp_Pd },
{ 1066, PT_PC, ucp_Pe },
{ 1069, PT_PC, ucp_Pf },
{ 1072, PT_SC, ucp_Phags_Pa },
{ 1081, PT_SC, ucp_Phoenician },
{ 1092, PT_PC, ucp_Pi },
{ 1095, PT_PC, ucp_Po },
{ 1098, PT_PC, ucp_Ps },
{ 1101, PT_SC, ucp_Psalter_Pahlavi },
{ 1117, PT_SC, ucp_Rejang },
{ 1124, PT_SC, ucp_Runic },
{ 1130, PT_GC, ucp_S },
{ 1132, PT_SC, ucp_Samaritan },
{ 1142, PT_SC, ucp_Saurashtra },
{ 1153, PT_PC, ucp_Sc },
{ 1156, PT_SC, ucp_Sharada },
{ 1164, PT_SC, ucp_Shavian },
{ 1172, PT_SC, ucp_Siddham },
{ 1180, PT_SC, ucp_SignWriting },
{ 1192, PT_SC, ucp_Sinhala },
{ 1200, PT_PC, ucp_Sk },
{ 1203, PT_PC, ucp_Sm },
{ 1206, PT_PC, ucp_So },
{ 1209, PT_SC, ucp_Sora_Sompeng },
{ 1222, PT_SC, ucp_Soyombo },
{ 1230, PT_SC, ucp_Sundanese },
{ 1240, PT_SC, ucp_Syloti_Nagri },
{ 1253, PT_SC, ucp_Syriac },
{ 1260, PT_SC, ucp_Tagalog },
{ 1268, PT_SC, ucp_Tagbanwa },
{ 1277, PT_SC, ucp_Tai_Le },
{ 1284, PT_SC, ucp_Tai_Tham },
{ 1293, PT_SC, ucp_Tai_Viet },
{ 1302, PT_SC, ucp_Takri },
{ 1308, PT_SC, ucp_Tamil },
{ 1314, PT_SC, ucp_Tangut },
{ 1321, PT_SC, ucp_Telugu },
{ 1328, PT_SC, ucp_Thaana },
{ 1335, PT_SC, ucp_Thai },
{ 1340, PT_SC, ucp_Tibetan },
{ 1348, PT_SC, ucp_Tifinagh },
{ 1357, PT_SC, ucp_Tirhuta },
{ 1365, PT_SC, ucp_Ugaritic },
{ 1374, PT_SC, ucp_Vai },
{ 1378, PT_SC, ucp_Warang_Citi },
{ 1390, PT_ALNUM, 0 },
{ 1394, PT_PXSPACE, 0 },
{ 1398, PT_SPACE, 0 },
{ 1402, PT_UCNC, 0 },
{ 1406, PT_WORD, 0 },
{ 1410, PT_SC, ucp_Yi },
{ 1413, PT_GC, ucp_Z },
{ 1415, PT_SC, ucp_Zanabazar_Square },
{ 1432, PT_PC, ucp_Zl },
{ 1435, PT_PC, ucp_Zp },
{ 1438, PT_PC, ucp_Zs }
{ 293, PT_SC, ucp_Dogra },
{ 299, PT_SC, ucp_Duployan },
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 329, PT_SC, ucp_Elbasan },
{ 337, PT_SC, ucp_Ethiopic },
{ 346, PT_SC, ucp_Georgian },
{ 355, PT_SC, ucp_Glagolitic },
{ 366, PT_SC, ucp_Gothic },
{ 373, PT_SC, ucp_Grantha },
{ 381, PT_SC, ucp_Greek },
{ 387, PT_SC, ucp_Gujarati },
{ 396, PT_SC, ucp_Gunjala_Gondi },
{ 410, PT_SC, ucp_Gurmukhi },
{ 419, PT_SC, ucp_Han },
{ 423, PT_SC, ucp_Hangul },
{ 430, PT_SC, ucp_Hanifi_Rohingya },
{ 446, PT_SC, ucp_Hanunoo },
{ 454, PT_SC, ucp_Hatran },
{ 461, PT_SC, ucp_Hebrew },
{ 468, PT_SC, ucp_Hiragana },
{ 477, PT_SC, ucp_Imperial_Aramaic },
{ 494, PT_SC, ucp_Inherited },
{ 504, PT_SC, ucp_Inscriptional_Pahlavi },
{ 526, PT_SC, ucp_Inscriptional_Parthian },
{ 549, PT_SC, ucp_Javanese },
{ 558, PT_SC, ucp_Kaithi },
{ 565, PT_SC, ucp_Kannada },
{ 573, PT_SC, ucp_Katakana },
{ 582, PT_SC, ucp_Kayah_Li },
{ 591, PT_SC, ucp_Kharoshthi },
{ 602, PT_SC, ucp_Khmer },
{ 608, PT_SC, ucp_Khojki },
{ 615, PT_SC, ucp_Khudawadi },
{ 625, PT_GC, ucp_L },
{ 627, PT_LAMP, 0 },
{ 630, PT_SC, ucp_Lao },
{ 634, PT_SC, ucp_Latin },
{ 640, PT_SC, ucp_Lepcha },
{ 647, PT_SC, ucp_Limbu },
{ 653, PT_SC, ucp_Linear_A },
{ 662, PT_SC, ucp_Linear_B },
{ 671, PT_SC, ucp_Lisu },
{ 676, PT_PC, ucp_Ll },
{ 679, PT_PC, ucp_Lm },
{ 682, PT_PC, ucp_Lo },
{ 685, PT_PC, ucp_Lt },
{ 688, PT_PC, ucp_Lu },
{ 691, PT_SC, ucp_Lycian },
{ 698, PT_SC, ucp_Lydian },
{ 705, PT_GC, ucp_M },
{ 707, PT_SC, ucp_Mahajani },
{ 716, PT_SC, ucp_Makasar },
{ 724, PT_SC, ucp_Malayalam },
{ 734, PT_SC, ucp_Mandaic },
{ 742, PT_SC, ucp_Manichaean },
{ 753, PT_SC, ucp_Marchen },
{ 761, PT_SC, ucp_Masaram_Gondi },
{ 775, PT_PC, ucp_Mc },
{ 778, PT_PC, ucp_Me },
{ 781, PT_SC, ucp_Medefaidrin },
{ 793, PT_SC, ucp_Meetei_Mayek },
{ 806, PT_SC, ucp_Mende_Kikakui },
{ 820, PT_SC, ucp_Meroitic_Cursive },
{ 837, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 858, PT_SC, ucp_Miao },
{ 863, PT_PC, ucp_Mn },
{ 866, PT_SC, ucp_Modi },
{ 871, PT_SC, ucp_Mongolian },
{ 881, PT_SC, ucp_Mro },
{ 885, PT_SC, ucp_Multani },
{ 893, PT_SC, ucp_Myanmar },
{ 901, PT_GC, ucp_N },
{ 903, PT_SC, ucp_Nabataean },
{ 913, PT_PC, ucp_Nd },
{ 916, PT_SC, ucp_New_Tai_Lue },
{ 928, PT_SC, ucp_Newa },
{ 933, PT_SC, ucp_Nko },
{ 937, PT_PC, ucp_Nl },
{ 940, PT_PC, ucp_No },
{ 943, PT_SC, ucp_Nushu },
{ 949, PT_SC, ucp_Ogham },
{ 955, PT_SC, ucp_Ol_Chiki },
{ 964, PT_SC, ucp_Old_Hungarian },
{ 978, PT_SC, ucp_Old_Italic },
{ 989, PT_SC, ucp_Old_North_Arabian },
{ 1007, PT_SC, ucp_Old_Permic },
{ 1018, PT_SC, ucp_Old_Persian },
{ 1030, PT_SC, ucp_Old_Sogdian },
{ 1042, PT_SC, ucp_Old_South_Arabian },
{ 1060, PT_SC, ucp_Old_Turkic },
{ 1071, PT_SC, ucp_Oriya },
{ 1077, PT_SC, ucp_Osage },
{ 1083, PT_SC, ucp_Osmanya },
{ 1091, PT_GC, ucp_P },
{ 1093, PT_SC, ucp_Pahawh_Hmong },
{ 1106, PT_SC, ucp_Palmyrene },
{ 1116, PT_SC, ucp_Pau_Cin_Hau },
{ 1128, PT_PC, ucp_Pc },
{ 1131, PT_PC, ucp_Pd },
{ 1134, PT_PC, ucp_Pe },
{ 1137, PT_PC, ucp_Pf },
{ 1140, PT_SC, ucp_Phags_Pa },
{ 1149, PT_SC, ucp_Phoenician },
{ 1160, PT_PC, ucp_Pi },
{ 1163, PT_PC, ucp_Po },
{ 1166, PT_PC, ucp_Ps },
{ 1169, PT_SC, ucp_Psalter_Pahlavi },
{ 1185, PT_SC, ucp_Rejang },
{ 1192, PT_SC, ucp_Runic },
{ 1198, PT_GC, ucp_S },
{ 1200, PT_SC, ucp_Samaritan },
{ 1210, PT_SC, ucp_Saurashtra },
{ 1221, PT_PC, ucp_Sc },
{ 1224, PT_SC, ucp_Sharada },
{ 1232, PT_SC, ucp_Shavian },
{ 1240, PT_SC, ucp_Siddham },
{ 1248, PT_SC, ucp_SignWriting },
{ 1260, PT_SC, ucp_Sinhala },
{ 1268, PT_PC, ucp_Sk },
{ 1271, PT_PC, ucp_Sm },
{ 1274, PT_PC, ucp_So },
{ 1277, PT_SC, ucp_Sogdian },
{ 1285, PT_SC, ucp_Sora_Sompeng },
{ 1298, PT_SC, ucp_Soyombo },
{ 1306, PT_SC, ucp_Sundanese },
{ 1316, PT_SC, ucp_Syloti_Nagri },
{ 1329, PT_SC, ucp_Syriac },
{ 1336, PT_SC, ucp_Tagalog },
{ 1344, PT_SC, ucp_Tagbanwa },
{ 1353, PT_SC, ucp_Tai_Le },
{ 1360, PT_SC, ucp_Tai_Tham },
{ 1369, PT_SC, ucp_Tai_Viet },
{ 1378, PT_SC, ucp_Takri },
{ 1384, PT_SC, ucp_Tamil },
{ 1390, PT_SC, ucp_Tangut },
{ 1397, PT_SC, ucp_Telugu },
{ 1404, PT_SC, ucp_Thaana },
{ 1411, PT_SC, ucp_Thai },
{ 1416, PT_SC, ucp_Tibetan },
{ 1424, PT_SC, ucp_Tifinagh },
{ 1433, PT_SC, ucp_Tirhuta },
{ 1441, PT_SC, ucp_Ugaritic },
{ 1450, PT_SC, ucp_Vai },
{ 1454, PT_SC, ucp_Warang_Citi },
{ 1466, PT_ALNUM, 0 },
{ 1470, PT_PXSPACE, 0 },
{ 1474, PT_SPACE, 0 },
{ 1478, PT_UCNC, 0 },
{ 1482, PT_WORD, 0 },
{ 1486, PT_SC, ucp_Yi },
{ 1489, PT_GC, ucp_Z },
{ 1491, PT_SC, ucp_Zanabazar_Square },
{ 1508, PT_PC, ucp_Zl },
{ 1511, PT_PC, ucp_Zp },
{ 1514, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load diff

View file

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -100,27 +100,25 @@ enum {
ucp_Zs /* Space separator */
};
/* These are grapheme break properties. */
/* These are grapheme break properties. The Extended Pictographic property
comes from the emoji-data.txt file. */
enum {
ucp_gbCR, /* 0 */
ucp_gbLF, /* 1 */
ucp_gbControl, /* 2 */
ucp_gbExtend, /* 3 */
ucp_gbPrepend, /* 4 */
ucp_gbSpacingMark, /* 5 */
ucp_gbL, /* 6 Hangul syllable type L */
ucp_gbV, /* 7 Hangul syllable type V */
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther, /* 12 */
ucp_gbE_Base, /* 13 */
ucp_gbE_Modifier, /* 14 */
ucp_gbE_Base_GAZ, /* 15 */
ucp_gbZWJ, /* 16 */
ucp_gbGlue_After_Zwj /* 17 */
ucp_gbCR, /* 0 */
ucp_gbLF, /* 1 */
ucp_gbControl, /* 2 */
ucp_gbExtend, /* 3 */
ucp_gbPrepend, /* 4 */
ucp_gbSpacingMark, /* 5 */
ucp_gbL, /* 6 Hangul syllable type L */
ucp_gbV, /* 7 Hangul syllable type V */
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther, /* 12 */
ucp_gbZWJ, /* 13 */
ucp_gbExtended_Pictographic /* 14 */
};
/* These are the script identifications. */
@ -274,7 +272,15 @@ enum {
ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square
ucp_Zanabazar_Square,
/* New for Unicode 11.0.0 */
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Sogdian
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

View file

@ -66,7 +66,7 @@
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros:
SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/
@ -147,17 +147,23 @@
#define SLJIT_CONFIG_UNSUPPORTED 1
#endif
#else /* !_WIN32 */
#else /* _WIN32 */
#if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1
#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
#define SLJIT_CONFIG_ARM_THUMB2 1
#elif (defined(_M_ARM) && _M_ARM >= 7)
#define SLJIT_CONFIG_ARM_V7 1
#elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1
#elif defined(_M_ARM64) || defined(__aarch64__)
#define SLJIT_CONFIG_ARM_64 1
#else
#define SLJIT_CONFIG_X86_32 1
#endif
#endif /* !WIN32 */
#endif /* !_WIN32 */
#endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@ -324,6 +330,11 @@
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif defined _WIN32
#define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
#else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
@ -371,12 +382,18 @@ typedef int sljit_sw;
#define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3
#ifdef _WIN32
#ifdef __GNUC__
/* These types do not require windows.h */
typedef unsigned long long sljit_uw;
typedef long long sljit_sw;
#else
typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw;
#else
#endif
#else /* !_WIN32 */
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
#endif
#endif /* _WIN32 */
#endif
typedef sljit_uw sljit_p;
@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)

View file

@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
void *retval;
#ifdef MAP_ANON
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
int flags = MAP_PRIVATE | MAP_ANON;
#ifdef MAP_JIT
flags |= MAP_JIT;
#endif
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero())

View file

@ -26,6 +26,13 @@
#include "sljitLir.h"
#ifdef _WIN32
/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
#include <windows.h>
#endif /* _WIN32 */
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */
@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
#endif
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{

View file

@ -138,7 +138,7 @@ of sljitConfigInternal.h */
be specified as scratch registers and the fifth one as saved register
on the CPU above and any user code which requires four scratch
registers can run unmodified. The SLJIT compiler automatically saves
the content of the two extra scrath register on the stack. Scratch
the content of the two extra scratch register on the stack. Scratch
registers can also be preserved by saving their value on the stack
but this needs to be done manually.
@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
register can hold any 32 or 64 bit value, and it is converted to a 32 bit
compatible format first. This conversion is free (no instructions are
emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
Note: memory addressing always uses 64 bit values on 64 bit systems so
@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
*/
#define SLJIT_F32_OP SLJIT_I32_OP
/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
to the result of an operation. Other CPUs (MIPS) does not have status
/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
to the result of an operation. Other CPUs (MIPS) do not have status
flags, and results must be stored in registers. To cover both architecture
types efficiently only two flags are defined by SLJIT:
@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Using these flags can reduce the number of emitted instructions. E.g. a
fast loop can be implemented by decreasing a counter register and set the
zero flag to jump back if the counter register is not reached zero.
zero flag to jump back if the counter register has not reached zero.
Motivation: although CPUs can set a large number of flags, usually their
values are ignored or only one of them is used. Emulating a large number
of flags on systems without flag register is complicated so SLJIT
instructions must specify the flag they want to use and only that flag
will be emulated. The last arithmetic instruction can be repeated if
multiple flags needs to be checked.
multiple flags need to be checked.
*/
/* Set Zero status flag. */
@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op1. */
#define SLJIT_OP1_BASE 32
/* The MOV instruction transfer data from source to destination.
/* The MOV instruction transfers data from source to destination.
MOV instruction suffixes:
@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_FAST_CALL 25
/* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 26
/* Called function must be decalred with cdecl attribute.
/* Called function must be declared with cdecl attribute.
This is the default attribute for C functions. */
#define SLJIT_CALL_CDECL 27
@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
/* Set the destination address of the jump to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
/* Emit an indirect jump or fast call. Both direct and indirect form
/* Emit an indirect jump or fast call.
Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode
type must be between SLJIT_JUMP and SLJIT_FAST_CALL
@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
#define SLJIT_MEM_POST 0x1000
/* Emit a single memory load or store with update instruction. When the
requested instruction from is not supported by the CPU, it returns
requested instruction form is not supported by the CPU, it returns
with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
allows specializing tight loops based on the supported instruction
forms (see SLJIT_MEM_SUPP flag).

View file

@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
};
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ADC 0x9a000000
#define ADD 0x8b000000
#define ADDE 0x8b200000
#define ADDI 0x91000000
#define AND 0x8a000000
#define ANDI 0x92000000
@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800
#define LDRI 0xf9400000
#define LDP 0xa9400000
#define LDP_PST 0xa8c00000
#define LDP_PRE 0xa9c00000
#define LDR_PRE 0xf8400c00
#define LSLV 0x9ac02000
#define LSRV 0x9ac02400
#define MADD 0x9b000000
@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
local_size = (local_size + 15) & ~0xf;
compiler->local_size = local_size;
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
if (saved_regs_size & 0x8)
saved_regs_size += sizeof(sljit_sw);
if (local_size <= (63 * sizeof(sljit_sw))) {
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
offs = (local_size - saved_regs_size) << (15 - 3);
} else {
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw);
}
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
if (saved_regs_size > 0)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
}
local_size = (local_size + 15) & ~0xf;
compiler->local_size = local_size + saved_regs_size;
FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
#ifdef _WIN32
if (local_size >= 4096)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
else if (local_size > 256)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
#endif
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
offs += 1 << 15;
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
offs += 1 << 15;
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
SLJIT_ASSERT(prev == -1);
if (prev != -1)
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size > (63 * sizeof(sljit_sw))) {
/* The local_size is already adjusted by the saved registers. */
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
}
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
args = get_arg_count(arg_types);
@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 4096) {
if (local_size < 4 * 4096) {
/* No need for a loop. */
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size -= 4096;
}
else {
FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size &= 0xfff;
}
if (local_size > 256) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 256) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
#else /* !_WIN32 */
/* The local_size does not include saved registers size. */
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size != 0)
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
#endif /* _WIN32 */
return SLJIT_SUCCESS;
}
@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 saved_regs_size;
CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
local_size = (local_size + 15) & ~0xf;
compiler->local_size = local_size;
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
if (saved_regs_size & 0x8)
saved_regs_size += sizeof(sljit_sw);
compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
return SLJIT_SUCCESS;
}
@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
local_size = compiler->local_size;
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
if (saved_regs_size & 0x8)
saved_regs_size += sizeof(sljit_sw);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
if (local_size <= (63 * sizeof(sljit_sw)))
offs = (local_size - saved_regs_size) << (15 - 3);
local_size = compiler->local_size - saved_regs_size;
/* Load LR as early as possible. */
if (local_size == 0)
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
else if (local_size < 63 * sizeof(sljit_sw)) {
FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | (local_size << (15 - 3))));
}
else {
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw);
}
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
}
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
offs += 1 << 15;
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
offs += 1 << 15;
prev = i;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
SLJIT_ASSERT(prev == -1);
if (prev != -1)
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
} else if (saved_regs_size > 0) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
}
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
return SLJIT_SUCCESS;
/* These two can be executed in parallel. */
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
return push_inst(compiler, RET | RN(TMP_LR));
}
/* --------------------------------------------------------------------- */
@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
sljit_s32 dst_reg;
sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (offset <= 0xffffff && offset >= -0xffffff) {
ins = ADDI;
if (offset < 0) {
offset = -offset;
ins = SUBI;
}
if (offset <= 0xfff)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
else {
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
offset &= 0xfff;
if (offset != 0)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
}
}
else {
FAIL_IF(load_immediate (compiler, dst_reg, offset));
/* Add extended register form. */
FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
}
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;

View file

@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ASRSI 0x1000
#define ASR_W 0xfa40f000
#define ASR_WI 0xea4f0020
#define BCC 0xd000
#define BICI 0xf0200000
#define BKPT 0xbe00
#define BLX 0x4780
@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040
#define EOR_W 0xea800000
#define IT 0xbf00
#define LDRI 0xf8500800
#define LSLS 0x4080
#define LSLSI 0x0000
#define LSL_W 0xfa00f000
@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBCI 0xf1600000
#define SBCS 0x4180
#define SBC_W 0xeb600000
#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000
#define STR_SP 0x9000
#define SUBS 0x1a00
@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SXTH 0xb200
#define SXTH_W 0xfa0ff080
#define TST 0x4200
#define UDIV 0xfbb0f0f0
#define UMULL 0xfba00000
#define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080
@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
/* Really complex instruction form for branches. */
s = (diff >> 23) & 0x1;
j1 = (~(diff >> 21) ^ s) & 0x1;
j2 = (~(diff >> 22) ^ s) & 0x1;
j1 = (~(diff >> 22) ^ s) & 0x1;
j2 = (~(diff >> 21) ^ s) & 0x1;
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
{
sljit_uw tmp;
/* MOVS cannot be used since it destroy flags. */
if (imm >= 0x10000) {
tmp = get_imm(imm);
if (tmp != INVALID_IMM)
@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{
sljit_s32 args, size, i, tmp;
sljit_ins push = 0;
#ifdef _WIN32
sljit_uw imm;
#endif
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size;
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096)
imm = get_imm(4096);
else
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
}
#else
if (local_size > 0) {
if (local_size <= (127 << 2))
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
else
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
}
#endif
args = get_arg_count(arg_types);
@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096) {
imm = get_imm(4096);
SLJIT_ASSERT(imm != INVALID_IMM);
if (local_size < 4 * 4096) {
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
local_size -= 4096;
SLJIT_ASSERT(local_size > 0);
}
else {
FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
local_size &= 0xfff;
if (local_size != 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
}
if (local_size >= 256) {
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
}
}
local_size &= 0xff;
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
#endif
return SLJIT_SUCCESS;
}
@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */
/* --------------------------------------------------------------------- */
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__GNUC__)
#ifdef _WIN32
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
extern long long __rt_sdiv(int denominator, int numerator);
#elif defined(__GNUC__)
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
extern int __aeabi_idivmod(int numerator, int denominator);
#else
@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
}
#endif
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
sljit_sw saved_reg_list[3];
sljit_sw saved_reg_count;
#endif
CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op));
@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 16)
| reg_map[SLJIT_R1]);
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
}
}
#if defined(__GNUC__)
#ifdef _WIN32
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
#elif defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
#else
@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
return SLJIT_SUCCESS;
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
}
return SLJIT_SUCCESS;

View file

@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins ins = NOP;
sljit_u8 offsets[4];
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
else if (arg_count != word_arg_count)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
else if (arg_count == 1)
ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;

View file

@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins prev_ins = NOP;
sljit_ins ins = NOP;
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
if (arg_count != word_arg_count)
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
else if (arg_count == 1)
ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
break;

View file

@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
#define RETURN_ADDR_REG 31
/* Flags are kept in volatile registers. */
#define EQUAL_FLAG 31
#define EQUAL_FLAG 3
#define OTHER_FLAG 1
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
};
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
/* Frequent case. */
FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
base = S(SLJIT_SP);
offs = local_size - (sljit_sw)sizeof(sljit_sw);
}
else {
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
base = S(TMP_REG2);
local_size = 0;
offs = -(sljit_sw)sizeof(sljit_sw);
}
offs = local_size - (sljit_sw)(sizeof(sljit_sw));
FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
tmp_ar = reg_ar;
delay_slot = reg_ar;
} else {
}
else {
tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS;
}
@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
{
sljit_s32 tmp_ar, base, delay_slot;
if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
return compiler->error;
compiler->cache_arg = 0;
compiler->cache_argw = 0;
return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
tmp_ar = reg_ar;
delay_slot = reg_ar;
}
else {
tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS;
}
base = arg & REG_MASK;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
argw &= 0x3;
if (SLJIT_UNLIKELY(argw)) {
FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
}
else
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
FAIL_IF(load_immediate(compiler, tmp_ar, argw));
if (base != 0)
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)

View file

@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) {
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
inst[0] = MOV_r_rm;
inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
inst += 2;
}
if (args > 1) {
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
inst[0] = MOV_r_rm;
inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
inst += 2;
}
if (args > 2) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
*inst++ = 0x24;
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
inst[0] = MOV_r_rm;
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
inst[2] = 0x24;
inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
}
#else
if (args > 0) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 2;
inst[0] = MOV_r_rm;
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
inst[2] = sizeof(sljit_sw) * 2;
inst += 3;
}
if (args > 1) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 3;
inst[0] = MOV_r_rm;
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
inst[2] = sizeof(sljit_sw) * 3;
inst += 3;
}
if (args > 2) {
*inst++ = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 4;
inst[0] = MOV_r_rm;
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
inst[2] = sizeof(sljit_sw) * 4;
}
#endif
@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN32
if (local_size > 1024) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
#else
/* Space for a single argument. This amount is excluded when the stack is allocated below. */
local_size -= sizeof(sljit_sw);
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
#endif
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
if (local_size > 0) {
if (local_size <= 4 * 4096) {
if (local_size > 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
if (local_size > 2 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
if (local_size > 3 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -16;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
}
#endif

View file

@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
compiler->mode32 = 0;
#ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
if (fscratches >= 6 || fsaveds >= 1)
@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifndef _WIN64
if (args > 0) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
inst[0] = REX_W;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
inst += 3;
}
if (args > 1) {
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
inst[0] = REX_W | REX_R;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
inst += 3;
}
if (args > 2) {
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
inst[0] = REX_W | REX_R;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
}
#else
if (args > 0) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
inst[0] = REX_W;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
inst += 3;
}
if (args > 1) {
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
inst[0] = REX_W;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
inst += 3;
}
if (args > 2) {
*inst++ = REX_W | REX_B;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
inst[0] = REX_W | REX_B;
inst[1] = MOV_r_rm;
inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
}
#endif
}
@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN64
if (local_size > 1024) {
/* Allocate stack for the callback, which grows the stack. */
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
FAIL_IF(!inst);
INC_SIZE(4 + (3 + sizeof(sljit_s32)));
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
/* Allocated size for registers must be divisible by 8. */
SLJIT_ASSERT(!(saved_register_size & 0x7));
/* Aligned to 16 byte. */
if (saved_register_size & 0x8) {
*inst++ = 5 * sizeof(sljit_sw);
local_size -= 5 * sizeof(sljit_sw);
} else {
*inst++ = 4 * sizeof(sljit_sw);
local_size -= 4 * sizeof(sljit_sw);
if (local_size > 0) {
if (local_size <= 4 * 4096) {
if (local_size > 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
if (local_size > 2 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
if (local_size > 3 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
/* Second instruction */
SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
*inst++ = REX_W;
*inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -19;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
}
#endif
if (local_size > 0) {
if (local_size <= 127) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
*inst++ = local_size;
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
sljit_unaligned_store_s32(inst, local_size);
inst += sizeof(sljit_s32);
}
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
}
#ifdef _WIN64

View file

@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
#ifdef _WIN32
#include <malloc.h>
static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
{
/* Workaround for calling the internal _chkstk() function on Windows.
This function touches all 4k pages belongs to the requested stack space,
which size is passed in local_size. This is necessary on Windows where
the stack can only grow in 4k steps. However, this function just burn
CPU cycles if the stack is large enough. However, you don't know it in
advance, so it must always be called. I think this is a bad design in
general even if it has some reasons. */
*(volatile sljit_s32*)alloca(local_size) = 0;
}
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c"
#else