diff --git a/0001-Add-support-for-PCRE-V2-and-prefer-it.patch b/0001-Add-support-for-PCRE-V2-and-prefer-it.patch new file mode 100644 index 0000000..25824b1 --- /dev/null +++ b/0001-Add-support-for-PCRE-V2-and-prefer-it.patch @@ -0,0 +1,328 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Dave Beckett +Date: Sun, 3 Sep 2023 21:41:59 -0700 +Subject: [PATCH] Add support for PCRE V2 (and prefer it) + +Intended to address GitHub Issue 12 +https://github.com/dajobe/rasqal/issues/12 +--- + configure.ac | 70 +++++++++++++++++++++++- + src/rasqal_regex.c | 133 ++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 197 insertions(+), 6 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 3b6c9ea2..6c6dbf63 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -448,8 +448,10 @@ AC_SUBST(RAPTOR_MIN_VERSION) + AM_CONDITIONAL(RASQAL_SORT, test $RAPTOR_VERSION_DEC -lt '20015') + + dnl Checks for regex libraries ++have_regex_pcre2=0 + have_regex_pcre=0 + have_regex_posix=0 ++need_regex_pcre2=0 + need_regex_pcre=0 + need_regex_posix=0 + +@@ -501,6 +503,45 @@ if test "x$enable_pcre" != "xno" ; then + fi + + ++AC_ARG_WITH(pcre2-config, [ --with-pcre2-config=PATH Location of PCRE2 pcre2-config (auto)], pcre2_config="$withval", pcre2_config="") ++ ++if test "X$pcre2_config" != "Xno" ; then ++ if test "X$pcre2_config" != "X" ; then ++ AC_MSG_CHECKING(for $pcre2_config) ++ ++ if test -x $pcre2_config ; then ++ PCRE2_CONFIG=$pcre2_config ++ AC_MSG_RESULT(yes) ++ else ++ AC_MSG_ERROR([pcre2-config not found at specified path $pcre2_config]) ++ fi ++ fi ++ if test "X$PCRE2_CONFIG" = "X" ; then ++ AC_CHECK_PROGS(PCRE2_CONFIG, pcre2-config) ++ fi ++fi ++ ++AC_MSG_CHECKING(for pcre2) ++PCRE2_VERSION=`$PCRE2_CONFIG --version 2>/dev/null` ++PCRE2_MIN_VERSION=10.37 ++ ++PCRE2_VERSION_DEC=`echo $PCRE2_VERSION | $AWK -F. '{printf("%d\n", 100*$1 + $2)};'` ++PCRE2_MIN_VERSION_DEC=`echo $PCRE2_MIN_VERSION | $AWK -F. '{printf("%d\n", 100*$1 + $2)};'` ++if test "X$PCRE2_VERSION" = X; then ++ AC_MSG_RESULT(not present) ++elif test "X$PCRE2_VERSION" -a $PCRE2_VERSION_DEC -ge $PCRE2_MIN_VERSION_DEC; then ++ have_regex_pcre2=1 ++ AC_MSG_RESULT($PCRE2_VERSION) ++else ++ AC_MSG_WARN($PCRE2_VERSION - too old - need $PCRE2_MIN_VERSION) ++fi ++ ++if test $have_regex_pcre2 = 1; then ++ AC_DEFINE(HAVE_REGEX_PCRE2, 1, [have PCRE2 regex - Perl Compatible Regular Expressions V2]) ++fi ++ ++ ++ + AC_MSG_CHECKING(for posix regex library) + oLIBS="$LIBS" + if test $ac_cv_header_regex_h = yes; then +@@ -522,10 +563,18 @@ fi + + + +-AC_ARG_WITH(regex-library, [ --with-regex-library=NAME Use regex library - posix, pcre (auto)], regex_library="$withval", regex_library="") ++AC_ARG_WITH(regex-library, [ --with-regex-library=NAME Use regex library - posix, pcre2, pcre (auto)], regex_library="$withval", regex_library="") + +-for regex_library_name in $regex_library pcre posix; do ++for regex_library_name in $regex_library pcre2 pcre posix; do + case $regex_library_name in ++ pcre2) ++ if test $have_regex_pcre2 = 1; then ++ need_regex_pcre2=1 ++ AC_DEFINE(RASQAL_REGEX_PCRE2, 1, [Use PCRE2 regex library]) ++ break ++ fi ++ ;; ++ + pcre) + if test $have_regex_pcre = 1; then + need_regex_pcre=1 +@@ -552,7 +601,9 @@ done + + AC_MSG_CHECKING(regex library to use) + regex_library= +-if test $need_regex_pcre = 1; then ++if test $need_regex_pcre2 = 1; then ++ regex_library=pcre2 ++elif test $need_regex_pcre = 1; then + regex_library=pcre + elif test $need_regex_posix = 1; then + regex_library=posix +@@ -929,6 +980,19 @@ if test $need_regex_pcre = 1; then + fi + + ++if test $need_regex_pcre2 = 1; then ++ C=`$PCRE2_CONFIG --cflags` ++ L=`$PCRE2_CONFIG --libs8` ++ RASQAL_INTERNAL_CPPFLAGS="$RASQAL_INTERNAL_CPPFLAGS $C" ++ RASQAL_EXTERNAL_LIBS="$RASQAL_EXTERNAL_LIBS $L" ++ ++ PKGCONFIG_CFLAGS="$PKGCONFIG_CFLAGS $C" ++ PKGCONFIG_LIBS="$PKGCONFIG_LIBS $L" ++ unset C ++ unset L ++fi ++ ++ + if test $need_digest_mhash = yes; then + C="" + L="-lmhash" +diff --git a/src/rasqal_regex.c b/src/rasqal_regex.c +index 698c80fd..b2dcaf72 100644 +--- a/src/rasqal_regex.c ++++ b/src/rasqal_regex.c +@@ -37,6 +37,11 @@ + #endif + #include + ++#ifdef RASQAL_REGEX_PCRE2 ++#define PCRE2_CODE_UNIT_WIDTH 8 ++#include ++#endif ++ + #ifdef RASQAL_REGEX_PCRE + #include + #endif +@@ -81,6 +86,12 @@ rasqal_regex_match(rasqal_world* world, raptor_locator* locator, + { + int flag_i = 0; /* regex_flags contains i */ + const char *p; ++#ifdef RASQAL_REGEX_PCRE2 ++ pcre2_code* re_code; ++ uint32_t compile_options = 0; ++ int errornumber = 0; ++ PCRE2_SIZE erroroffset = 0; ++#endif + #ifdef RASQAL_REGEX_PCRE + pcre* re; + int compile_options = PCRE_UTF8; +@@ -99,6 +110,48 @@ rasqal_regex_match(rasqal_world* world, raptor_locator* locator, + if(*p == 'i') + flag_i++; + ++#ifdef RASQAL_REGEX_PCRE2 ++ if(flag_i) ++ compile_options |= PCRE2_CASELESS; ++ ++ re_code = pcre2_compile(RASQAL_GOOD_CAST(PCRE2_SPTR, pattern), ++ PCRE2_ZERO_TERMINATED, ++ compile_options, ++ &errornumber, ++ &erroroffset, ++ /* ccontext */ NULL); ++ if(!re_code) { ++ PCRE2_UCHAR buffer[256]; ++ pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); ++ rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator, ++ "Regex compile of '%s' failed at offset %d: %s", ++ pattern, (int)erroroffset, buffer); ++ rc = -1; ++ } else { ++ pcre2_match_data *md = pcre2_match_data_create(4, NULL); ++ ++ rc = pcre2_match(re_code, ++ RASQAL_GOOD_CAST(PCRE2_SPTR, subject), ++ RASQAL_GOOD_CAST(PCRE2_SIZE, subject_len), ++ /* startoffset */ 0, ++ /* options */ 0, ++ md, ++ /* mcontext */ NULL /* no match detail wanted */ ++ ); ++ if(rc >= 0) ++ rc = 1; ++ else if(rc != PCRE2_ERROR_NOMATCH && rc != PCRE2_ERROR_NULL) { ++ rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator, ++ "Regex match failed - returned code %d", rc); ++ rc= -1; ++ } else ++ rc = 0; ++ pcre2_match_data_free(md); ++ } ++ pcre2_code_free(re_code); ++ ++#endif ++ + #ifdef RASQAL_REGEX_PCRE + if(flag_i) + compile_options |= PCRE_CASELESS; +@@ -169,7 +222,7 @@ rasqal_regex_match(rasqal_world* world, raptor_locator* locator, + } + + +- ++#if defined(RASQAL_REGEX_PCRE) || defined(RASQAL_REGEX_POSIX) + /* + * rasqal_regex_get_ref_number: + * @str: pointer to pointer to buffer at '$' symbol +@@ -204,6 +257,7 @@ rasqal_regex_get_ref_number(const char **str) + *str = p; + return ref_number; + } ++#endif + + + #ifdef RASQAL_REGEX_PCRE +@@ -698,6 +752,12 @@ rasqal_regex_replace(rasqal_world* world, raptor_locator* locator, + size_t* result_len_p) + { + const char *p; ++#ifdef RASQAL_REGEX_PCRE2 ++ pcre2_code* re_code; ++ uint32_t compile_options = 0; ++ int errornumber = 0; ++ PCRE2_SIZE erroroffset = 0; ++#endif + #ifdef RASQAL_REGEX_PCRE + pcre* re; + int compile_options = PCRE_UTF8; +@@ -715,6 +775,73 @@ rasqal_regex_replace(rasqal_world* world, raptor_locator* locator, + #endif + char *result_s = NULL; + ++#ifdef RASQAL_REGEX_PCRE2 ++ for(p = regex_flags; p && *p; p++) { ++ if(*p == 'i') ++ compile_options |= PCRE2_CASELESS; ++ } ++ ++ re_code = pcre2_compile(RASQAL_GOOD_CAST(PCRE2_SPTR, pattern), ++ PCRE2_ZERO_TERMINATED, ++ compile_options, ++ &errornumber, ++ &erroroffset, ++ /* ccontext */ NULL); ++ if(!re_code) { ++ PCRE2_UCHAR buffer[256]; ++ pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); ++ rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator, ++ "Regex compile of '%s' failed at offset %d: %s", ++ pattern, (int)erroroffset, buffer); ++ } else { ++ uint32_t substitute_options = PCRE2_SUBSTITUTE_LITERAL | PCRE2_SUBSTITUTE_GLOBAL; ++ size_t output_len = 0; ++ char* output_buffer = NULL; ++ int rc; ++ ++ /* Calculate size of output buffer */ ++ rc = pcre2_substitute(re_code, ++ RASQAL_GOOD_CAST(PCRE2_SPTR, subject), ++ PCRE2_ZERO_TERMINATED, ++ /* startoffset */ 0, ++ substitute_options | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, ++ /* match_data */ NULL, ++ /* mcontext */ NULL, /* no match detail wanted */ ++ RASQAL_GOOD_CAST(PCRE2_SPTR, replace), ++ replace_len, ++ /* outputbuffer */ NULL, /* forcing size calc */ ++ RASQAL_GOOD_CAST(PCRE2_SIZE*, &output_len)); ++ if(rc == PCRE2_ERROR_NOMEMORY) { ++ output_buffer = RASQAL_MALLOC(char*, output_len + 1); ++ ++ rc = pcre2_substitute(re_code, ++ RASQAL_GOOD_CAST(PCRE2_SPTR, subject), ++ PCRE2_ZERO_TERMINATED, ++ /* startoffset */ 0, ++ substitute_options, ++ /* match_data */ NULL, ++ /* mcontext */ NULL, /* no match detail wanted */ ++ RASQAL_GOOD_CAST(PCRE2_SPTR, replace), ++ replace_len, ++ RASQAL_GOOD_CAST(PCRE2_UCHAR*, output_buffer), ++ RASQAL_GOOD_CAST(PCRE2_SIZE*, &output_len)); ++ } ++ if(rc < 0) { ++ rasqal_log_error_simple(world, RAPTOR_LOG_LEVEL_ERROR, locator, ++ "Regex replace of '%s' failed with code %d", ++ pattern, rc); ++ result_s = NULL; ++ if(output_buffer) ++ RASQAL_FREE(char*, output_buffer); ++ } else { ++ result_s = output_buffer; ++ if(result_len_p) ++ *result_len_p = output_len; ++ } ++ } ++ pcre2_code_free(re_code); ++#endif ++ + #ifdef RASQAL_REGEX_PCRE + for(p = regex_flags; p && *p; p++) { + if(*p == 'i') +@@ -794,7 +921,7 @@ main(int argc, char *argv[]) + { + rasqal_world* world; + const char *program = rasqal_basename(argv[0]); +-#ifdef RASQAL_REGEX_PCRE ++#if defined(RASQAL_REGEX_PCRE) || defined(RASQAL_REGEX_PCRE2) + raptor_locator* locator = NULL; + int test = 0; + #endif +@@ -813,7 +940,7 @@ main(int argc, char *argv[]) + program); + #endif + +-#ifdef RASQAL_REGEX_PCRE ++#if defined(RASQAL_REGEX_PCRE) || defined(RASQAL_REGEX_PCRE2) + for(test = 0; test < NTESTS; test++) { + const char* regex_flags = ""; + const char* subject = "abcd1234-^"; +-- +2.42.0 + diff --git a/PKGBUILD b/PKGBUILD index e79b654..7817213 100644 --- a/PKGBUILD +++ b/PKGBUILD @@ -15,7 +15,7 @@ depends=( glibc libgcrypt mpfr - pcre + pcre2 sh ) makedepends=( @@ -24,15 +24,19 @@ makedepends=( util-linux ) provides=(librasqal.so) -source=(https://download.librdf.org/source/$pkgname-$pkgver.tar.gz{,.asc}) +source=(https://download.librdf.org/source/$pkgname-$pkgver.tar.gz{,.asc} + 0001-Add-support-for-PCRE-V2-and-prefer-it.patch) sha512sums=('05728682797470db9e51d156012e8fde9dec1554d107372faa11cbe6cdc3356e92386f4f8de6d7c41e3100b76f9b1c6809102a913829cddbd2ff29043c04d522' - 'SKIP') + 'SKIP' + '2d2a56251963cadea18822ba42da03cbbe58d19952420563de862d95393ba5987f811cbb409bc26981c3b5c33714a303d5947273fc3f0b569ed010785cf7d3f5') b2sums=('87bca86b2f0dceb0801f5a34dae9ae1f87d8a7d0dc1e03fdf04998a0d4885ed76be2719d3e01489a5510a26715bb093a75ce194cc42f3cedff88f64161d6a2fb' - 'SKIP') + 'SKIP' + 'e25677586a5c5988567887811413ad3e4255a26228545fa642313b37bead61be265bd7b0705c75b0644002f78a5f837185a711ea3f332ec5a44f3bfd4929d883') validpgpkeys=('F879F0DEDA780198DD08DC6443EC92504F71955A') # Dave Beckett prepare() { cd $pkgname-$pkgver + patch -Np1 -i ../0001-Add-support-for-PCRE-V2-and-prefer-it.patch autoreconf -fiv }