mirror of
https://github.com/php/php-src.git
synced 2026-04-09 09:03:04 +02:00
New memcpy()-based wordwrap() implementation. The simple case
(single-character break, no forced break) appears to be about 60%
faster, and there's simply no comparison for non-simple cases with
non-trivial amounts of text. The old algorithm was O(n^2) (with an
unfortunately large constant factor) because of the use of strncat(),
the new one is O(n). Added some more tests, too.
@ - Made wordwrap() significantly faster. (Jim)
# test case: $t = join('',file('ChangeLog')); $w = wordwrap($t,10,"\n",1);
# new code completes in less than a second. i'm still waiting for the
# old code to finish.
This commit is contained in:
@@ -613,9 +613,11 @@ PHP_FUNCTION(ltrim)
|
||||
Wraps buffer to selected number of characters using string break char */
|
||||
PHP_FUNCTION(wordwrap)
|
||||
{
|
||||
char *text, *breakchar = "\n", *newtext;
|
||||
const char *text, *breakchar = "\n";
|
||||
char *newtext;
|
||||
int textlen, breakcharlen = 1, newtextlen;
|
||||
long linelength = 75, i = 0, l = 0, pgr = 0, last = 0;
|
||||
long current = 0, laststart = 0, lastspace = 0;
|
||||
long linelength = 75;
|
||||
zend_bool docut = 0;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lsb", &text, &textlen, &linelength, &breakchar, &breakcharlen, &docut) == FAILURE) {
|
||||
@@ -634,121 +636,96 @@ PHP_FUNCTION(wordwrap)
|
||||
additional storage space */
|
||||
if (breakcharlen == 1 && !docut) {
|
||||
newtext = estrndup(text, textlen);
|
||||
while (newtext[i] != '\0') {
|
||||
|
||||
/* prescan line to see if it is greater than linelength */
|
||||
l = 0;
|
||||
while (newtext[i+l] != breakchar[0]) {
|
||||
if (newtext[i+l] == '\0') {
|
||||
l--;
|
||||
break;
|
||||
}
|
||||
l++;
|
||||
laststart = lastspace = 0;
|
||||
for (current = 0; current < textlen; current++) {
|
||||
if (text[current] == breakchar[0]) {
|
||||
laststart = lastspace = current;
|
||||
}
|
||||
|
||||
if (l >= linelength) {
|
||||
pgr = l;
|
||||
l = linelength;
|
||||
|
||||
/* needs breaking; work backwards to find previous word */
|
||||
while (l >= 0) {
|
||||
if (newtext[i+l] == ' ') {
|
||||
newtext[i+l] = breakchar[0];
|
||||
break;
|
||||
}
|
||||
l--;
|
||||
}
|
||||
|
||||
if (l == -1) {
|
||||
/* couldn't break is backwards, try looking forwards */
|
||||
l = linelength;
|
||||
while (l <= pgr) {
|
||||
if(newtext[i+l] == ' ') {
|
||||
newtext[i+l] = breakchar[0];
|
||||
break;
|
||||
}
|
||||
l++;
|
||||
}
|
||||
else if (text[current] == ' ') {
|
||||
if (current - laststart >= linelength) {
|
||||
newtext[current] = breakchar[0];
|
||||
laststart = current;
|
||||
}
|
||||
lastspace = current;
|
||||
}
|
||||
else if (current - laststart >= linelength
|
||||
&& laststart != lastspace) {
|
||||
newtext[lastspace] = breakchar[0];
|
||||
laststart = lastspace;
|
||||
}
|
||||
|
||||
i += l + 1;
|
||||
}
|
||||
|
||||
RETURN_STRINGL(newtext, textlen, 0);
|
||||
}
|
||||
else {
|
||||
/* Multiple character line break */
|
||||
newtextlen = textlen * (breakcharlen + 1) + 1;
|
||||
/* Multiple character line break or forced cut */
|
||||
if (linelength > 0) {
|
||||
newtextlen = textlen + (textlen/linelength) * breakcharlen + 1;
|
||||
}
|
||||
else {
|
||||
newtextlen = textlen * (breakcharlen + 1) + 1;
|
||||
}
|
||||
newtext = emalloc(newtextlen);
|
||||
newtext[0] = '\0';
|
||||
|
||||
i = 0;
|
||||
while (text[i] != '\0') {
|
||||
/* now keep track of the actual new text length */
|
||||
newtextlen = 0;
|
||||
|
||||
/* prescan line to see if it is greater than linelength */
|
||||
l = 0;
|
||||
while (text[i+l] != '\0') {
|
||||
if (text[i+l] == breakchar[0]) {
|
||||
if (breakcharlen == 1 || !strncmp(text+i+l, breakchar, breakcharlen))
|
||||
break;
|
||||
}
|
||||
l++;
|
||||
laststart = lastspace = 0;
|
||||
for (current = 0; current < textlen; current++) {
|
||||
/* when we hit an existing break, copy to new buffer, and
|
||||
* fix up laststart and lastspace */
|
||||
if (text[current] == breakchar[0]
|
||||
&& current + breakcharlen < textlen
|
||||
&& !strncmp(text+current, breakchar, breakcharlen)) {
|
||||
memcpy(newtext+newtextlen, text+laststart, current-laststart+breakcharlen);
|
||||
newtextlen += current-laststart+breakcharlen;
|
||||
current += breakcharlen - 1;
|
||||
laststart = lastspace = current + 1;
|
||||
}
|
||||
|
||||
if (l >= linelength) {
|
||||
pgr = l;
|
||||
l = linelength;
|
||||
|
||||
/* needs breaking; work backwards to find previous word */
|
||||
while (l >= 0) {
|
||||
if (text[i+l] == ' ') {
|
||||
strncat(newtext, text+last, i+l-last);
|
||||
strncat(newtext, breakchar, breakcharlen);
|
||||
last = i + l + 1;
|
||||
break;
|
||||
}
|
||||
l--;
|
||||
/* if it is a space, check if it is at the line boundary,
|
||||
* copy and insert a break, or just keep track of it */
|
||||
else if (text[current] == ' ') {
|
||||
if (current - laststart >= linelength) {
|
||||
memcpy(newtext+newtextlen, text+laststart, current-laststart);
|
||||
newtextlen += current - laststart;
|
||||
memcpy(newtext+newtextlen, breakchar, breakcharlen);
|
||||
newtextlen += breakcharlen;
|
||||
laststart = current + 1;
|
||||
}
|
||||
|
||||
if (l == -1) {
|
||||
/* couldn't break it backwards, try looking forwards */
|
||||
l = linelength - 1;
|
||||
while (l <= pgr) {
|
||||
if (!docut) {
|
||||
if (text[i+l] == ' ') {
|
||||
strncat(newtext, text+last, i+l-last);
|
||||
strncat(newtext, breakchar, breakcharlen);
|
||||
last = i + l + 1;
|
||||
++l;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* cut if longer than allowed */
|
||||
else {
|
||||
if (text[i+l] == ' ' || l > i-last) {
|
||||
strncat(newtext, text+last, i+l-last+1);
|
||||
strncat(newtext, breakchar, breakcharlen);
|
||||
last = i + l + 1;
|
||||
++l;
|
||||
break;
|
||||
}
|
||||
}
|
||||
l++;
|
||||
}
|
||||
}
|
||||
i += l + 1;
|
||||
lastspace = current;
|
||||
}
|
||||
else {
|
||||
i += (l ? l : 1);
|
||||
/* if we are cutting, and we've accumulated enough
|
||||
* characters, copy and insert a break. */
|
||||
else if (current - laststart >= linelength && docut) {
|
||||
memcpy(newtext+newtextlen, text+laststart, current-laststart);
|
||||
newtextlen += current - laststart;
|
||||
memcpy(newtext+newtextlen, breakchar, breakcharlen);
|
||||
newtextlen += breakcharlen;
|
||||
laststart = lastspace = current;
|
||||
}
|
||||
/* if the current word puts us over the linelength, copy
|
||||
* back up until the last space, insert a break, and move
|
||||
* up the laststart */
|
||||
else if (current - laststart >= linelength
|
||||
&& laststart < lastspace) {
|
||||
memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
|
||||
newtextlen += lastspace - laststart;
|
||||
memcpy(newtext+newtextlen, breakchar, breakcharlen);
|
||||
newtextlen += breakcharlen;
|
||||
laststart = lastspace = lastspace + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i + l > last) {
|
||||
strncat(newtext, text+last, i+l-last);
|
||||
/* copy over any stragglers */
|
||||
if (laststart != current) {
|
||||
memcpy(newtext+newtextlen, text+laststart, current-laststart);
|
||||
newtextlen += current - laststart;
|
||||
}
|
||||
|
||||
RETURN_STRINGL(newtext, strlen(newtext), 0);
|
||||
newtext[newtextlen] = '\0';
|
||||
|
||||
RETURN_STRINGL(newtext, newtextlen, 0);
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
@@ -11,6 +11,19 @@ $tests = <<<TESTS
|
||||
"12345\\n12345\\n12345\\n12345" === wordwrap("12345 12345 12345 12345",0)
|
||||
"12345ab12345ab12345ab12345" === wordwrap("12345 12345 12345 12345",0,"ab")
|
||||
"12345 12345ab1234567890ab1234567890" === wordwrap("12345 12345 1234567890 1234567890",12,"ab")
|
||||
"123ab123ab123" === wordwrap("123ab123ab123", 3, "ab")
|
||||
"123ab123ab123" === wordwrap("123ab123ab123", 5, "ab")
|
||||
"123ab 123ab123" === wordwrap("123 123ab123", 3, "ab")
|
||||
"123ab123ab123" === wordwrap("123 123ab123", 5, "ab")
|
||||
"123 123ab123" === wordwrap("123 123 123", 10, "ab")
|
||||
|
||||
"123ab123ab123" === wordwrap("123ab123ab123", 3, "ab", 1)
|
||||
"123ab123ab123" === wordwrap("123ab123ab123", 5, "ab", 1)
|
||||
"123ab 12ab3ab123" === wordwrap("123 123ab123", 3, "ab", 1)
|
||||
"123 ab123ab123" === wordwrap("123 123ab123", 5, "ab", 1)
|
||||
"123 123ab 123" === wordwrap("123 123 123", 8, "ab", 1)
|
||||
"123 123ab45 123" === wordwrap("123 12345 123", 8, "ab", 1)
|
||||
"1ab2ab3ab4" === wordwrap("1234", 1, "ab", 1)
|
||||
TESTS;
|
||||
|
||||
include('../../../../tests/quicktester.inc');
|
||||
|
||||
Reference in New Issue
Block a user