本文经由过程对PHP源码的探秘,揭露了 trim 会致使乱码的缘由。 
运转以下代码: $tag = "互联网产物、";$text = rtrim($tag, "、");print_r($text); 运转,咱们能够觉得会获得的成果是 互联网产物 ,现实成果是 互联网产 。为甚么会如许呢? 道理 trim 函数文档 string trim ( string $str [, string $character_mask = " x0B" ] ) 该函数不是多字节函数,也便是说,汉字如许的多字节字符,会拿其头或尾的单字节来和前面的 $character_mask 对应的char数组停止婚配,若是在前面的数组中,则删掉,持续婚配。比方: echo ltrim("bcdf","abc"); // df 以上面的 demo 中的函数 string_print_char 所示: 、 由 0xe3 0x80 0x81 三字节构成, 品 由 0xe5 0x93 0x81 三字节构成。 以是在履行 rtrim 的时辰,经由过程字节比对,会将 0x81 去掉,致使了最初呈现了乱码。 源码精简版演示 检查 PHP7 的源码,而后提炼出上面的小 demo ,便利大师一路进修,实在PHP源码的进修并不难,天天前进一点点。 //// main.c// trim//// Created by 周梦康 on 2017/10/18.// Copyright 2017年 周梦康. All rights reserved.//#include <stdio.h>#include <stdlib.h>#include <string.h>void string_print_char(char *str);void php_charmask(unsigned char *input, size_t len, char *mask);char *ltrim(char *str,char *character_mask);char *rtrim(char *str,char *character_mask);int main(int argc, char const *argv[]){ printf("%s",ltrim("bcdf","abc")); string_print_char("品"); // e5 93 81 string_print_char("、"); // e3 80 81 printf("%s",rtrim("互联网产物、","、")); return 0;}char *ltrim(char *str,char *character_mask){ char *res; char mask[256]; register size_t i; int trimmed = 0; size_t len = strlen(str); php_charmask((unsigned char*)character_mask, strlen(character_mask), mask); for (i = 0; i < len; i++) { if (mask[(unsigned char)str[i]]) { trimmed++; } else { break; } } len -= trimmed; str += trimmed; res = (char *) malloc(sizeof(char) * (len+1)); memcpy(res,str,len); return res;}char *rtrim(char *str,char *character_mask){ char *res; char mask[256]; register size_t i; size_t len = strlen(str); php_charmask((unsigned char*)character_mask, strlen(character_mask), mask); if (len > 0) { i = len - 1; do { if (mask[(unsigned char)str[i]]) { len--; } else { break; } } while (i-- != 0); } res = (char *) malloc(sizeof(char) * (len+1)); memcpy(res,str,len); return res;}void string_print_char(char *str){ unsigned long l = strlen(str); for (int i=0; i < l; i++) { printf("%02hhx ",str[i]); } printf("");}void php_charmask(unsigned char *input, size_t len, char *mask){ unsigned char *end; unsigned char c; memset(mask, 0, 256); for (end = input+len; input < end; input++) { c = *input; mask[c]= 1; }} PHP7 相干源码 PHP_FUNCTION(trim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);}PHP_FUNCTION(rtrim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);}PHP_FUNCTION(ltrim){ php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);}static void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode){ zend_string *str; zend_string *what = NULL; ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_STR(str) Z_PARAM_OPTIONAL Z_PARAM_STR(what) ZEND_PARSE_PARAMETERS_END(); ZVAL_STR(return_value, php_trim(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));}PHPAPI zend_string *php_trim(zend_string *str, char *what, size_t what_len, int mode){ const char *c = ZSTR_VAL(str); size_t len = ZSTR_LEN(str); register size_t i; size_t trimmed = 0; char mask[256]; if (what) { if (what_len == 1) { char p = *what; if (mode & 1) { for (i = 0; i < len; i++) { if (c[i] == p) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len 1; do { if (c[i] == p) { len--; } else { break; } } while (i-- != 0); } } } else { php_charmask((unsigned char*)what, what_len, mask); if (mode & 1) { for (i = 0; i < len; i++) { if (mask[(unsigned char)c[i]]) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len - 1; do { if (mask[(unsigned char)c[i]]) { len--; } else { break; } } while (i-- != 0); } } } } else { if (mode & 1) { for (i = 0; i < len; i++) { if ((unsigned char)c[i] <= " " && (c[i] == " " || c[i] == "" || c[i] == "" || c[i] == " " || c[i] == "v" || c[i] == " ")) { trimmed++; } else { break; } } len -= trimmed; c += trimmed; } if (mode & 2) { if (len > 0) { i = len - 1; do { if ((unsigned char)c[i] <= " " && (c[i] == " " || c[i] == "" || c[i] == "" || c[i] == " " || c[i] == "v" || c[i] == " ")) { len--; } else { break; } } while (i-- != 0); } } } if (ZSTR_LEN(str) == len) { return zend_string_copy(str); } else { return zend_string_init(c, len, 0); }}/* {{{ php_charmask * Fills a 256-byte bytemask with input. You can specify a range like "a..z", * it needs to be incrementing. * Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors) */static inline int php_charmask(unsigned char *input, size_t len, char *mask){ unsigned char *end; unsigned char c; int result = SUCCESS; memset(mask, 0, 256); for (end = input+len; input < end; input++) { c=*input; if ((input+3 < end) && input[1] == "." && input[2] == "." && input[3] >= c) { memset(mask+c, 1, input[3] - c + 1); input+=3; } else if ((input+1 < end) && input[0] == "." && input[1] == ".") { /* Error, try to be as helpful as possible: (a range ending/starting with "." won"t be captured here) */ if (end-len >= input) { /* there was no "left" char */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, no character to the left of "..""); result = FAILURE; continue; } if (input+2 >= end) { /* there is no "right" char */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, no character to the right of "..""); result = FAILURE; continue; } if (input[-1] > input[2]) { /* wrong order */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range, ".."-range needs to be incrementing"); result = FAILURE; continue; } /* FIXME: better error (a..b..c is the only left possibility?) */ php_error_docref(NULL, E_WARNING, "Invalid ".."-range"); result = FAILURE; continue; } else { mask[c]=1; } } return result;}/* }}} */ 本文仅代表作者小我概念,不代表巅云官方发声,对概念有疑义请先接洽作者本身停止点窜,若内容不法请接洽平台办理员,邮箱2522407257@qq.com。更多相干资讯,请到巅云www.taishanly.com进修互联网营销手艺请到巅云建站www.yx10011.com。 |