changeset 6696:a476264b067f

6629836 perl 5.8 vulnerable to buffer overflow in regular expression engine 6688142 Lgrp perl test broken in 5.6.1
author vm156888
date Fri, 23 May 2008 01:41:17 -0700
parents 12d7dd4459fd
children acd61c9c8407
files usr/src/cmd/perl/5.8.4/distrib/patchlevel.h usr/src/cmd/perl/5.8.4/distrib/regcomp.c usr/src/cmd/perl/5.8.4/distrib/t/op/pat.t usr/src/cmd/perl/contrib/Sun/Solaris/Lgrp/t/Lgrp_api.t
diffstat 4 files changed, 53 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/perl/5.8.4/distrib/patchlevel.h	Thu May 22 22:23:49 2008 -0700
+++ b/usr/src/cmd/perl/5.8.4/distrib/patchlevel.h	Fri May 23 01:41:17 2008 -0700
@@ -150,6 +150,7 @@
 	"27722 perlio.c breaks on Solaris/gcc when > 256 FDs are available",
  	"SPRINTF0 - fixes for sprintf formatting issues - CVE-2005-3962",
 	"6663288 Upgrade to CGI.pm 3.33",
+ 	"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116",
 	NULL
 };
 
--- a/usr/src/cmd/perl/5.8.4/distrib/regcomp.c	Thu May 22 22:23:49 2008 -0700
+++ b/usr/src/cmd/perl/5.8.4/distrib/regcomp.c	Fri May 23 01:41:17 2008 -0700
@@ -126,7 +126,10 @@
     I32		extralen;
     I32		seen_zerolen;
     I32		seen_evals;
-    I32		utf8;
+    I32		utf8;		/* whether the pattern is utf8 or not */
+    I32		orig_utf8;	/* whether the pattern was originally in utf8 */
+				/* XXX use this for future optimisation of case
+				 * where pattern must be upgraded to utf8. */
 #if ADD_TO_REGEXEC
     char 	*starttry;		/* -Dr: where regtry was called. */
 #define RExC_starttry	(pRExC_state->starttry)
@@ -152,6 +155,7 @@
 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
 #define RExC_seen_evals	(pRExC_state->seen_evals)
 #define RExC_utf8	(pRExC_state->utf8)
+#define RExC_orig_utf8	(pRExC_state->orig_utf8)
 
 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
@@ -1746,15 +1750,17 @@
     if (exp == NULL)
 	FAIL("NULL regexp argument");
 
-    RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
-
-    RExC_precomp = exp;
+    RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+
     DEBUG_r({
 	 if (!PL_colorset) reginitcolors();
 	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
 		       PL_colors[4],PL_colors[5],PL_colors[0],
-		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
+		       (int)(xend - exp), exp, PL_colors[1]);
     });
+
+redo_first_pass:
+    RExC_precomp = exp;
     RExC_flags = pm->op_pmflags;
     RExC_sawback = 0;
 
@@ -1780,6 +1786,26 @@
 	RExC_precomp = Nullch;
 	return(NULL);
     }
+
+    if (RExC_utf8 && !RExC_orig_utf8) {
+        /* It's possible to write a regexp in ascii that represents unicode
+        codepoints outside of the byte range, such as via \x{100}. If we
+        detect such a sequence we have to convert the entire pattern to utf8
+        and then recompile, as our sizing calculation will have been based
+        on 1 byte == 1 character, but we will need to use utf8 to encode
+        at least some part of the pattern, and therefore must convert the whole
+        thing.
+        XXX: somehow figure out how to make this less expensive...
+        -- dmq */
+        STRLEN len = xend-exp;
+        DEBUG_r(PerlIO_printf(Perl_debug_log,
+	    "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+        xend = exp + len;
+        RExC_orig_utf8 = RExC_utf8;
+        SAVEFREEPV(exp);
+        goto redo_first_pass;
+    }
     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
 
     /* Small enough for pointer-storage convention?
--- a/usr/src/cmd/perl/5.8.4/distrib/t/op/pat.t	Thu May 22 22:23:49 2008 -0700
+++ b/usr/src/cmd/perl/5.8.4/distrib/t/op/pat.t	Fri May 23 01:41:17 2008 -0700
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1056\n";
+print "1..1058\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3255,6 +3255,15 @@
 	);
     }
 }
+ 
+{
+    use warnings;
+    my @w;
+    local $SIG{__WARN__}=sub{push @w,"@_"};
+    my $c=qq(\x{DF}); 
+    ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
+    ok(@w==0, "No warnings");
+}    
 
 # perl #25269: panic: pp_match start/end pointers
 ok("a-bc" eq eval {
--- a/usr/src/cmd/perl/contrib/Sun/Solaris/Lgrp/t/Lgrp_api.t	Thu May 22 22:23:49 2008 -0700
+++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Lgrp/t/Lgrp_api.t	Fri May 23 01:41:17 2008 -0700
@@ -24,13 +24,23 @@
 # Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
+# ident	"@(#)Lgrp_api.t	1.1	06/09/01 SMI"
 #
 
 require 5.8.0;
 use strict;
 use warnings;
 
+# Make sure that Lgrp test is not executed on anything less than 5.8.0,
+# as Lgrp is not implemented there
+BEGIN {
+	if ($] < 5.008) {
+		# Fake one successfull test and exit
+		printf "1..1\nok\n";
+		exit 0;
+	}
+}
+
 ######################################################################
 # Tests for Sun::Solaris::Lgrp API.
 #