From ad47d45e9da8df364cb0a61b6146d51c196c8891 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 20 Mar 2012 19:44:05 -0400 Subject: upgrade to latest upstream TRE regex code (0.8.0) the main practical results of this change are 1. the regex code is no longer subject to LGPL; it's now 2-clause BSD 2. most (all?) popular nonstandard regex extensions are supported I hesitate to call this a "sync" since both the old and new code are heavily modified. in one sense, the old code was "more severely" modified, in that it was actively hostile to non-strictly-conforming expressions. on the other hand, the new code has eliminated the useless translation of the entire regex string to wchar_t prior to compiling, and now only converts multibyte character literals as needed. in the future i may use this modified TRE as a basis for writing the long-planned new regex engine that will avoid multibyte-to-wide character conversion entirely by compiling multibyte bracket expressions specific to UTF-8. --- src/regex/tre.h | 99 ++++++++++++++++++--------------------------------------- 1 file changed, 31 insertions(+), 68 deletions(-) (limited to 'src/regex/tre.h') diff --git a/src/regex/tre.h b/src/regex/tre.h index bfd171f4..d6e1c2a7 100644 --- a/src/regex/tre.h +++ b/src/regex/tre.h @@ -1,21 +1,31 @@ /* tre-internal.h - TRE internal definitions - Copyright (c) 2001-2006 Ville Laurikari . - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Copyright (c) 2001-2009 Ville Laurikari + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ @@ -23,12 +33,7 @@ #include #include -#define TRE_MULTIBYTE 1 #undef TRE_MBSTATE -#define TRE_WCHAR 1 -#define TRE_USE_SYSTEM_WCTYPE 1 -#define HAVE_WCSTOMBS 1 -#define TRE_MB_CUR_MAX MB_CUR_MAX #define NDEBUG @@ -37,33 +42,16 @@ typedef int reg_errcode_t; typedef wchar_t tre_char_t; - -#ifdef TRE_DEBUG -#include -#define DPRINT(msg) do {printf msg; fflush(stdout);} while(0) -#else /* !TRE_DEBUG */ #define DPRINT(msg) do { } while(0) -#endif /* !TRE_DEBUG */ #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) -#if 1 -int __mbtowc(wchar_t *, const char *); -#define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s))) -#else #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) -#endif /* Wide characters. */ typedef wint_t tre_cint_t; #define TRE_CHAR_MAX WCHAR_MAX -#ifdef TRE_MULTIBYTE -#define TRE_MB_CUR_MAX MB_CUR_MAX -#else /* !TRE_MULTIBYTE */ -#define TRE_MB_CUR_MAX 1 -#endif /* !TRE_MULTIBYTE */ - #define tre_isalnum iswalnum #define tre_isalpha iswalpha #define tre_isblank iswblank @@ -98,9 +86,6 @@ typedef wctype_t tre_ctype_t; #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) -/* Define STRF to the correct printf formatter for strings. */ -#define STRF "ls" - /* TNFA transition type. A TNFA state is an array of transitions, the terminator is a transition with NULL `state'. */ typedef struct tnfa_transition tre_tnfa_transition_t; @@ -170,42 +155,21 @@ struct tnfa { tre_tnfa_transition_t *initial; tre_tnfa_transition_t *final; tre_submatch_data_t *submatch_data; + char *firstpos_chars; + int first_char; unsigned int num_submatches; tre_tag_direction_t *tag_directions; + int *minimal_tags; int num_tags; + int num_minimals; int end_tag; int num_states; int cflags; int have_backrefs; + int have_approx; + int params_depth; }; -#if 0 -static int -tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); - -static void -tre_free(regex_t *preg); - -static void -tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, - const tre_tnfa_t *tnfa, int *tags, int match_eo); - -static reg_errcode_t -tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, - tre_str_type_t type, int *match_tags, int eflags, - int *match_end_ofs); - -static reg_errcode_t -tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len, - tre_str_type_t type, int *match_tags, int eflags, - int *match_end_ofs); - -static reg_errcode_t -tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, - int len, tre_str_type_t type, int *match_tags, - int eflags, int *match_end_ofs); -#endif - /* from tre-mem.h: */ #define TRE_MEM_BLOCK_SIZE 1024 @@ -266,4 +230,3 @@ void tre_mem_destroy(tre_mem_t mem); #define xfree free #define xrealloc realloc -/* EOF */ -- cgit v1.2.1