Skip to content

Commit 2d2b582

Browse files
authored
Merge pull request #111 from gitlost/issue_106
PR 109 (double-width wrapping) and issue 106 (pre-colorization).
2 parents 2a15154 + 7466f68 commit 2d2b582

File tree

6 files changed

+433
-85
lines changed

6 files changed

+433
-85
lines changed

lib/cli/Colors.php

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static public function color($color) {
9191

9292
$colors = array();
9393
foreach (array('color', 'style', 'background') as $type) {
94-
$code = @$color[$type];
94+
$code = $color[$type];
9595
if (isset(self::$_colors[$type][$code])) {
9696
$colors[] = self::$_colors[$type][$code];
9797
}
@@ -115,26 +115,25 @@ static public function color($color) {
115115
static public function colorize($string, $colored = null) {
116116
$passed = $string;
117117

118-
if (isset(self::$_string_cache[md5($passed)]['colorized'])) {
119-
return self::$_string_cache[md5($passed)]['colorized'];
120-
}
121-
122118
if (!self::shouldColorize($colored)) {
123-
$colors = self::getColors();
124-
$search = array_keys( $colors );
125-
$return = str_replace( $search, '', $string );
126-
self::cacheString($passed, $return, $colored);
119+
$return = self::decolorize( $passed, 2 /*keep_encodings*/ );
120+
self::cacheString($passed, $return);
127121
return $return;
128122
}
129123

124+
$md5 = md5($passed);
125+
if (isset(self::$_string_cache[$md5]['colorized'])) {
126+
return self::$_string_cache[$md5]['colorized'];
127+
}
128+
130129
$string = str_replace('%%', '', $string);
131130

132131
foreach (self::getColors() as $key => $value) {
133132
$string = str_replace($key, self::color($value), $string);
134133
}
135134

136135
$string = str_replace('', '%', $string);
137-
self::cacheString($passed, $string, $colored);
136+
self::cacheString($passed, $string);
138137

139138
return $string;
140139
}
@@ -143,15 +142,22 @@ static public function colorize($string, $colored = null) {
143142
* Remove color information from a string.
144143
*
145144
* @param string $string A string with color information.
145+
* @param int $keep Optional. If the 1 bit is set, color tokens (eg "%n") won't be stripped. If the 2 bit is set, color encodings (ANSI escapes) won't be stripped. Default 0.
146146
* @return string A string with color information removed.
147147
*/
148-
static public function decolorize($string) {
149-
// Get rid of color tokens if they exist
150-
$string = str_replace(array_keys(self::getColors()), '', $string);
148+
static public function decolorize( $string, $keep = 0 ) {
149+
if ( ! ( $keep & 1 ) ) {
150+
// Get rid of color tokens if they exist
151+
$string = str_replace('%%', '', $string);
152+
$string = str_replace(array_keys(self::getColors()), '', $string);
153+
$string = str_replace('', '%', $string);
154+
}
151155

152-
// Remove color encoding if it exists
153-
foreach (self::getColors() as $key => $value) {
154-
$string = str_replace(self::color($value), '', $string);
156+
if ( ! ( $keep & 2 ) ) {
157+
// Remove color encoding if it exists
158+
foreach (self::getColors() as $key => $value) {
159+
$string = str_replace(self::color($value), '', $string);
160+
}
155161
}
156162

157163
return $string;
@@ -162,13 +168,13 @@ static public function decolorize($string) {
162168
*
163169
* @param string $passed The original string before colorization.
164170
* @param string $colorized The string after running through self::colorize.
165-
* @param string $colored The string without any color information.
171+
* @param string $deprecated Optional. Not used. Default null.
166172
*/
167-
static public function cacheString($passed, $colorized, $colored) {
173+
static public function cacheString( $passed, $colorized, $deprecated = null ) {
168174
self::$_string_cache[md5($passed)] = array(
169175
'passed' => $passed,
170176
'colorized' => $colorized,
171-
'decolorized' => self::decolorize($passed)
177+
'decolorized' => self::decolorize($passed), // Not very useful but keep for BC.
172178
);
173179
}
174180

@@ -179,41 +185,36 @@ static public function cacheString($passed, $colorized, $colored) {
179185
* @return int
180186
*/
181187
static public function length($string) {
182-
if (isset(self::$_string_cache[md5($string)]['decolorized'])) {
183-
$test_string = self::$_string_cache[md5($string)]['decolorized'];
184-
} else {
185-
$test_string = self::decolorize($string);
186-
}
187-
188-
return safe_strlen($test_string);
188+
return safe_strlen( self::decolorize( $string ) );
189189
}
190190

191191
/**
192-
* Return the width (length in characters) of the string without color codes.
192+
* Return the width (length in characters) of the string without color codes if enabled.
193193
*
194-
* @param string $string the string to measure
194+
* @param string $string The string to measure.
195+
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
196+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
195197
* @return int
196198
*/
197-
static public function width($string) {
198-
$md5 = md5($string);
199-
if (isset(self::$_string_cache[$md5]['decolorized'])) {
200-
$test_string = self::$_string_cache[$md5]['decolorized'];
201-
} else {
202-
$test_string = self::decolorize($string);
203-
}
204-
205-
return strwidth($test_string);
199+
static public function width( $string, $pre_colorized = false, $encoding = false ) {
200+
return strwidth( $pre_colorized || self::shouldColorize() ? self::decolorize( $string, $pre_colorized ? 1 /*keep_tokens*/ : 0 ) : $string, $encoding );
206201
}
207202

208203
/**
209204
* Pad the string to a certain display length.
210205
*
211-
* @param string $string the string to pad
212-
* @param integer $length the display length
206+
* @param string $string The string to pad.
207+
* @param int $length The display length.
208+
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
209+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
213210
* @return string
214211
*/
215-
static public function pad($string, $length) {
216-
return safe_str_pad( $string, $length );
212+
static public function pad( $string, $length, $pre_colorized = false, $encoding = false ) {
213+
$real_length = self::width( $string, $pre_colorized, $encoding );
214+
$diff = strlen( $string ) - $real_length;
215+
$length += $diff;
216+
217+
return str_pad( $string, $length );
217218
}
218219

219220
/**

lib/cli/Table.php

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public function setRenderer(Renderer $renderer) {
102102
*/
103103
protected function checkRow(array $row) {
104104
foreach ($row as $column => $str) {
105-
$width = Colors::shouldColorize() ? Colors::width($str) : strwidth($str);
105+
$width = Colors::width( $str, $this->isAsciiPreColorized( $column ) );
106106
if (!isset($this->_width[$column]) || $width > $this->_width[$column]) {
107107
$this->_width[$column] = $width;
108108
}
@@ -228,4 +228,30 @@ public function setRows(array $rows) {
228228
public function countRows() {
229229
return count($this->_rows);
230230
}
231+
232+
/**
233+
* Set whether items in an Ascii table are pre-colorized.
234+
*
235+
* @param bool|array $precolorized A boolean to set all columns in the table as pre-colorized, or an array of booleans keyed by column index (number) to set individual columns as pre-colorized.
236+
* @see cli\Ascii::setPreColorized()
237+
*/
238+
public function setAsciiPreColorized( $pre_colorized ) {
239+
if ( $this->_renderer instanceof Ascii ) {
240+
$this->_renderer->setPreColorized( $pre_colorized );
241+
}
242+
}
243+
244+
/**
245+
* Is a column in an Ascii table pre-colorized?
246+
*
247+
* @param int $column Column index to check.
248+
* @return bool True if whole Ascii table is marked as pre-colorized, or if the individual column is pre-colorized; else false.
249+
* @see cli\Ascii::isPreColorized()
250+
*/
251+
private function isAsciiPreColorized( $column ) {
252+
if ( $this->_renderer instanceof Ascii ) {
253+
return $this->_renderer->isPreColorized( $column );
254+
}
255+
return false;
256+
}
231257
}

lib/cli/cli.php

Lines changed: 86 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,19 @@ function menu( $items, $default = null, $title = 'Choose an item' ) {
156156
* Attempts an encoding-safe way of getting string length. If mb_string extensions aren't
157157
* installed, falls back to basic strlen if no encoding is present
158158
*
159-
* @param string The string to check
160-
* @return int Numeric value that represents the string's length
159+
* @param string $str The string to check.
160+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
161+
* @return int Numeric value that represents the string's length
161162
*/
162-
function safe_strlen( $str ) {
163-
if ( function_exists( 'mb_strlen' ) && function_exists( 'mb_detect_encoding' ) ) {
164-
$length = mb_strlen( $str, mb_detect_encoding( $str ) );
163+
function safe_strlen( $str, $encoding = false ) {
164+
if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
165+
if ( ! $encoding ) {
166+
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
167+
}
168+
$length = mb_strlen( $str, $encoding );
165169
} else {
166170
// iconv will return PHP notice if non-ascii characters are present in input string
167-
$str = iconv( 'ASCII' , 'ASCII', $str );
171+
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
168172

169173
$length = strlen( $str );
170174
}
@@ -176,17 +180,43 @@ function safe_strlen( $str ) {
176180
* Attempts an encoding-safe way of getting a substring. If mb_string extensions aren't
177181
* installed, falls back to ascii substring if no encoding is present
178182
*
179-
* @param string $str The input string
180-
* @param int $start The starting position of the substring
181-
* @param boolean $length Maximum length of the substring
182-
* @return string Substring of string specified by start and length parameters
183-
*/
184-
function safe_substr( $str, $start, $length = false ) {
185-
if ( function_exists( 'mb_substr' ) && function_exists( 'mb_detect_encoding' ) ) {
186-
$substr = mb_substr( $str, $start, $length, mb_detect_encoding( $str ) );
183+
* @param string $str The input string.
184+
* @param int $start The starting position of the substring.
185+
* @param int|bool|null $length Optional. Maximum length of the substring. Default false.
186+
* @param int|bool $is_width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
187+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
188+
* @return string Substring of string specified by start and length parameters
189+
*/
190+
function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) {
191+
// PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do `safe_strlen()`.
192+
if ( null === $length || false === $length ) {
193+
$length = safe_strlen( $str, $encoding );
194+
}
195+
if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
196+
if ( ! $encoding ) {
197+
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
198+
}
199+
$substr = mb_substr( $str, $start, $length, $encoding );
200+
201+
if ( $is_width && 'UTF-8' === $encoding ) {
202+
// Set the East Asian Width regex.
203+
$eaw_regex = get_unicode_regexs( 'eaw' );
204+
// If there's any East Asian double-width chars...
205+
if ( preg_match( $eaw_regex, $substr ) ) {
206+
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
207+
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
208+
$cnt = min( count( $chars ), $length );
209+
$width = $length;
210+
211+
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
212+
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
213+
}
214+
return join( '', array_slice( $chars, 0, $length ) );
215+
}
216+
}
187217
} else {
188218
// iconv will return PHP notice if non-ascii characters are present in input string
189-
$str = iconv( 'ASCII' , 'ASCII', $str );
219+
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
190220

191221
$substr = substr( $str, $start, $length );
192222
}
@@ -197,13 +227,13 @@ function safe_substr( $str, $start, $length = false ) {
197227
/**
198228
* An encoding-safe way of padding string length for display
199229
*
200-
* @param string $string The string to pad
201-
* @param int $length The length to pad it to
230+
* @param string $string The string to pad.
231+
* @param int $length The length to pad it to.
232+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
202233
* @return string
203234
*/
204-
function safe_str_pad( $string, $length ) {
205-
$cleaned_string = Colors::shouldColorize() ? Colors::decolorize( $string ) : $string;
206-
$real_length = strwidth( $cleaned_string );
235+
function safe_str_pad( $string, $length, $encoding = false ) {
236+
$real_length = strwidth( $string, $encoding );
207237
$diff = strlen( $string ) - $real_length;
208238
$length += $diff;
209239

@@ -213,16 +243,13 @@ function safe_str_pad( $string, $length ) {
213243
/**
214244
* Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8.
215245
*
216-
* @param string The string to check
217-
* @return int The string's width.
246+
* @param string $string The string to check.
247+
* @param string|bool $encoding Optional. The encoding of the string. Default false.
248+
* @return int The string's width.
218249
*/
219-
function strwidth( $string ) {
220-
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
221-
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
222-
if ( null === $eaw_regex ) {
223-
// Load both regexs generated from Unicode data.
224-
require __DIR__ . '/unicode/regex.php';
225-
}
250+
function strwidth( $string, $encoding = false ) {
251+
// Set the East Asian Width and Mark regexs.
252+
list( $eaw_regex, $m_regex ) = get_unicode_regexs();
226253

227254
// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
228255
$test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' );
@@ -239,8 +266,10 @@ function strwidth( $string ) {
239266
return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
240267
}
241268
}
242-
if ( function_exists( 'mb_strwidth' ) && function_exists( 'mb_detect_encoding' ) ) {
243-
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
269+
if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
270+
if ( ! $encoding ) {
271+
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
272+
}
244273
$width = mb_strwidth( $string, $encoding );
245274
if ( 'UTF-8' === $encoding ) {
246275
// Subtract combining characters.
@@ -252,3 +281,29 @@ function strwidth( $string ) {
252281
}
253282
return safe_strlen( $string );
254283
}
284+
285+
/**
286+
* Get the regexs generated from Unicode data.
287+
*
288+
* @param string $idx Optional. Return a specific regex only. Default null.
289+
* @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
290+
*/
291+
function get_unicode_regexs( $idx = null ) {
292+
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
293+
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
294+
if ( null === $eaw_regex ) {
295+
// Load both regexs generated from Unicode data.
296+
require __DIR__ . '/unicode/regex.php';
297+
}
298+
299+
if ( null !== $idx ) {
300+
if ( 'eaw' === $idx ) {
301+
return $eaw_regex;
302+
}
303+
if ( 'm' === $idx ) {
304+
return $m_regex;
305+
}
306+
}
307+
308+
return array( $eaw_regex, $m_regex, );
309+
}

0 commit comments

Comments
 (0)