如何使用C编程语言编写一个函数,以分隔符并返回带有分隔符的字符串数组?
char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');
strtok()
族函数的关键是static variables
在C语言中的理解,即它们在使用它们的连续函数调用之间的行为。请参阅下面的代码
如何使用C编程语言编写一个函数,以分隔符并返回带有分隔符的字符串数组?
char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
str_split(str,',');
strtok()
族函数的关键是static variables
在C语言中的理解,即它们在使用它们的连续函数调用之间的行为。请参阅下面的代码
Answers:
您可以使用该strtok()
函数分割字符串(并指定要使用的分隔符)。请注意,这strtok()
将修改传递给它的字符串。如果原始字符串在其他地方是必需的,请复制该副本并将其传递给strtok()
。
编辑:
示例(请注意,它不处理连续的定界符,例如“ JAN ,,, FEB,MAR”):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
char** str_split(char* a_str, const char a_delim)
{
char** result = 0;
size_t count = 0;
char* tmp = a_str;
char* last_comma = 0;
char delim[2];
delim[0] = a_delim;
delim[1] = 0;
/* Count how many elements will be extracted. */
while (*tmp)
{
if (a_delim == *tmp)
{
count++;
last_comma = tmp;
}
tmp++;
}
/* Add space for trailing token. */
count += last_comma < (a_str + strlen(a_str) - 1);
/* Add space for terminating null string so caller
knows where the list of returned strings ends. */
count++;
result = malloc(sizeof(char*) * count);
if (result)
{
size_t idx = 0;
char* token = strtok(a_str, delim);
while (token)
{
assert(idx < count);
*(result + idx++) = strdup(token);
token = strtok(0, delim);
}
assert(idx == count - 1);
*(result + idx) = 0;
}
return result;
}
int main()
{
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char** tokens;
printf("months=[%s]\n\n", months);
tokens = str_split(months, ',');
if (tokens)
{
int i;
for (i = 0; *(tokens + i); i++)
{
printf("month=[%s]\n", *(tokens + i));
free(*(tokens + i));
}
printf("\n");
free(tokens);
}
return 0;
}
输出:
$ ./main.exe
months=[JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC]
month=[JAN]
month=[FEB]
month=[MAR]
month=[APR]
month=[MAY]
month=[JUN]
month=[JUL]
month=[AUG]
month=[SEP]
month=[OCT]
month=[NOV]
month=[DEC]
strtok
由废弃的标记strsep(3)
,以手册页。
strtok_s()
(Microsoft,C11 Annex K,可选)或strtok_r()
(POSIX)比plain更好strtok()
。strtok()
在库函数中,plain 是邪恶的。当时可能没有正在使用的调用库函数的函数strtok()
,也没有库函数调用的函数可以调用strtok()
。
strtok()
不是线程安全的(出于提及@JonathanLeffler的原因),因此整个函数也不是线程安全的。如果尝试在踩踏的环境中使用此功能,则会得到不稳定且不可预测的结果。更换strtok()
为strtok_r()
修复此问题。
我认为strsep
仍然是最好的工具:
while ((token = strsep(&str, ","))) my_fn(token);
从字面上看,这是分割字符串的一行。
多余的括号是一个样式元素,表示我们有意测试赋值的结果,而不是相等运算符==
。
对于模式工作,token
并str
都有型char *
。如果您以字符串文字开头,那么您首先需要复制它:
// More general pattern:
const char *my_str_literal = "JAN,FEB,MAR";
char *token, *str, *tofree;
tofree = str = strdup(my_str_literal); // We own str's memory now.
while ((token = strsep(&str, ","))) my_fn(token);
free(tofree);
如果两个分隔符一起出现在str
,您将获得一个token
空字符串值。str
修改了的值,因为遇到的每个定界符都被零字节覆盖-这是复制首先解析的字符串的另一个很好的理由。
在评论中,有人建议这样strtok
做要比strsep
因为strtok
更便于携带而更好。Ubuntu和Mac OS X都有strsep
; 可以肯定地说其他unixy系统也是如此。Windows缺少strsep
,但是它具有strbrk
实现这种简短而甜美的strsep
替换的功能:
char *strsep(char **stringp, const char *delim) {
if (*stringp == NULL) { return NULL; }
char *token_start = *stringp;
*stringp = strpbrk(token_start, delim);
if (*stringp) {
**stringp = '\0';
(*stringp)++;
}
return token_start;
}
这是strsep
vs 的很好解释strtok
。利弊可以主观判断;但是,我认为这是一个明显的迹象,strsep
旨在替代strtok
。
tofree
一个人会得到自由而没有str
?
str
它,因为它的值可以通过调用来更改strsep()
。tofree
一致的值指向要释放的内存的开始。
下面的方法将为您完成所有工作(内存分配,计算长度)。在此处可以找到更多信息和描述-Java String.split()方法的实现以拆分C字符串
int split (const char *str, char c, char ***arr)
{
int count = 1;
int token_len = 1;
int i = 0;
char *p;
char *t;
p = str;
while (*p != '\0')
{
if (*p == c)
count++;
p++;
}
*arr = (char**) malloc(sizeof(char*) * count);
if (*arr == NULL)
exit(1);
p = str;
while (*p != '\0')
{
if (*p == c)
{
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
token_len = 0;
i++;
}
p++;
token_len++;
}
(*arr)[i] = (char*) malloc( sizeof(char) * token_len );
if ((*arr)[i] == NULL)
exit(1);
i = 0;
p = str;
t = ((*arr)[i]);
while (*p != '\0')
{
if (*p != c && *p != '\0')
{
*t = *p;
t++;
}
else
{
*t = '\0';
i++;
t = ((*arr)[i]);
}
p++;
}
return count;
}
如何使用它:
int main (int argc, char ** argv)
{
int i;
char *s = "Hello, this is a test module for the string splitting.";
int c = 0;
char **arr = NULL;
c = split(s, ' ', &arr);
printf("found %d tokens.\n", c);
for (i = 0; i < c; i++)
printf("string #%d: %s\n", i, arr[i]);
return 0;
}
found 10 tokens. string #0: Hello, string #1: this string #2: is string #3: a string #4: test string #5: module string #6: for string #7: the string #8: string string #9: splitting.¢
这是我的两分钱:
int split (const char *txt, char delim, char ***tokens)
{
int *tklen, *t, count = 1;
char **arr, *p = (char *) txt;
while (*p != '\0') if (*p++ == delim) count += 1;
t = tklen = calloc (count, sizeof (int));
for (p = (char *) txt; *p != '\0'; p++) *p == delim ? *t++ : (*t)++;
*tokens = arr = malloc (count * sizeof (char *));
t = tklen;
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
while (*txt != '\0')
{
if (*txt == delim)
{
p = *arr++ = calloc (*(t++) + 1, sizeof (char *));
txt++;
}
else *p++ = *txt++;
}
free (tklen);
return count;
}
用法:
char **tokens;
int count, i;
const char *str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
count = split (str, ',', &tokens);
for (i = 0; i < count; i++) printf ("%s\n", tokens[i]);
/* freeing tokens */
for (i = 0; i < count; i++) free (tokens[i]);
free (tokens);
在上面的示例中,将有一种方法可以在字符串中返回一个空终止的字符串数组(如您想要的那样)。但是,这将无法传递文字字符串,因为必须通过以下函数对其进行修改:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char** str_split( char* str, char delim, int* numSplits )
{
char** ret;
int retLen;
char* c;
if ( ( str == NULL ) ||
( delim == '\0' ) )
{
/* Either of those will cause problems */
ret = NULL;
retLen = -1;
}
else
{
retLen = 0;
c = str;
/* Pre-calculate number of elements */
do
{
if ( *c == delim )
{
retLen++;
}
c++;
} while ( *c != '\0' );
ret = malloc( ( retLen + 1 ) * sizeof( *ret ) );
ret[retLen] = NULL;
c = str;
retLen = 1;
ret[0] = str;
do
{
if ( *c == delim )
{
ret[retLen++] = &c[1];
*c = '\0';
}
c++;
} while ( *c != '\0' );
}
if ( numSplits != NULL )
{
*numSplits = retLen;
}
return ret;
}
int main( int argc, char* argv[] )
{
const char* str = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char* strCpy;
char** split;
int num;
int i;
strCpy = malloc( strlen( str ) * sizeof( *strCpy ) );
strcpy( strCpy, str );
split = str_split( strCpy, ',', &num );
if ( split == NULL )
{
puts( "str_split returned NULL" );
}
else
{
printf( "%i Results: \n", num );
for ( i = 0; i < num; i++ )
{
puts( split[i] );
}
}
free( split );
free( strCpy );
return 0;
}
可能有更整洁的方法,但是您明白了。
此函数采用char *字符串,并由分隔符将其分割。连续可以有多个分隔符。请注意,该函数会修改原始字符串。如果需要原始字符串保持原样,则必须先复制原始字符串。此函数不使用任何cstring函数调用,因此它可能比其他函数快一点。如果您不关心内存分配,则可以在函数顶部以大小strlen(src_str)/ 2分配sub_strings,并且(像提到的c ++“版本”一样)跳过函数的下半部分。如果这样做,该函数将简化为O(N),但是下面显示的内存优化方式是O(2N)。
功能:
char** str_split(char *src_str, const char deliminator, size_t &num_sub_str){
//replace deliminator's with zeros and count how many
//sub strings with length >= 1 exist
num_sub_str = 0;
char *src_str_tmp = src_str;
bool found_delim = true;
while(*src_str_tmp){
if(*src_str_tmp == deliminator){
*src_str_tmp = 0;
found_delim = true;
}
else if(found_delim){ //found first character of a new string
num_sub_str++;
found_delim = false;
//sub_str_vec.push_back(src_str_tmp); //for c++
}
src_str_tmp++;
}
printf("Start - found %d sub strings\n", num_sub_str);
if(num_sub_str <= 0){
printf("str_split() - no substrings were found\n");
return(0);
}
//if you want to use a c++ vector and push onto it, the rest of this function
//can be omitted (obviously modifying input parameters to take a vector, etc)
char **sub_strings = (char **)malloc( (sizeof(char*) * num_sub_str) + 1);
const char *src_str_terminator = src_str_tmp;
src_str_tmp = src_str;
bool found_null = true;
size_t idx = 0;
while(src_str_tmp < src_str_terminator){
if(!*src_str_tmp) //found a NULL
found_null = true;
else if(found_null){
sub_strings[idx++] = src_str_tmp;
//printf("sub_string_%d: [%s]\n", idx-1, sub_strings[idx-1]);
found_null = false;
}
src_str_tmp++;
}
sub_strings[num_sub_str] = NULL;
return(sub_strings);
}
如何使用它:
char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char *str = strdup(months);
size_t num_sub_str;
char **sub_strings = str_split(str, ',', num_sub_str);
char *endptr;
if(sub_strings){
for(int i = 0; sub_strings[i]; i++)
printf("[%s]\n", sub_strings[i]);
}
free(sub_strings);
free(str);
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
/**
* splits str on delim and dynamically allocates an array of pointers.
*
* On error -1 is returned, check errno
* On success size of array is returned, which may be 0 on an empty string
* or 1 if no delim was found.
*
* You could rewrite this to return the char ** array instead and upon NULL
* know it's an allocation problem but I did the triple array here. Note that
* upon the hitting two delim's in a row "foo,,bar" the array would be:
* { "foo", NULL, "bar" }
*
* You need to define the semantics of a trailing delim Like "foo," is that a
* 2 count array or an array of one? I choose the two count with the second entry
* set to NULL since it's valueless.
* Modifies str so make a copy if this is a problem
*/
int split( char * str, char delim, char ***array, int *length ) {
char *p;
char **res;
int count=0;
int k=0;
p = str;
// Count occurance of delim in string
while( (p=strchr(p,delim)) != NULL ) {
*p = 0; // Null terminate the deliminator.
p++; // Skip past our new null
count++;
}
// allocate dynamic array
res = calloc( 1, count * sizeof(char *));
if( !res ) return -1;
p = str;
for( k=0; k<count; k++ ){
if( *p ) res[k] = p; // Copy start of string
p = strchr(p, 0 ); // Look for next null
p++; // Start of next string
}
*array = res;
*length = count;
return 0;
}
char str[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,";
int main() {
char **res;
int k=0;
int count =0;
int rc;
rc = split( str, ',', &res, &count );
if( rc ) {
printf("Error: %s errno: %d \n", strerror(errno), errno);
}
printf("count: %d\n", count );
for( k=0; k<count; k++ ) {
printf("str: %s\n", res[k]);
}
free(res );
return 0;
}
下面是我strtok()
从zString库实现的方法。
zstring_strtok()
与标准库的区别在于strtok()
它处理连续定界符的方式。
请看下面的代码,确保您对它的工作原理有所了解(我尝试使用尽可能多的注释)
char *zstring_strtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}
以下是用法示例...
Example Usage
char str[] = "A,B,,,C";
printf("1 %s\n",zstring_strtok(s,","));
printf("2 %s\n",zstring_strtok(NULL,","));
printf("3 %s\n",zstring_strtok(NULL,","));
printf("4 %s\n",zstring_strtok(NULL,","));
printf("5 %s\n",zstring_strtok(NULL,","));
printf("6 %s\n",zstring_strtok(NULL,","));
Example Output
1 A
2 B
3 ,
4 ,
5 C
6 (null)
该库可以从Github下载 https://github.com/fnoyanisi/zString
我认为以下解决方案是理想的:
代码说明:
token
来存储令牌的地址和长度str
完全由分隔符组成时,所以有strlen(str) + 1
令牌,所有令牌都为空字符串str
记录每个令牌的地址和长度NULL
标记值的额外空间memcpy
速度比strcpy
我们快,我们知道长度typedef struct {
const char *start;
size_t len;
} token;
char **split(const char *str, char sep)
{
char **array;
unsigned int start = 0, stop, toks = 0, t;
token *tokens = malloc((strlen(str) + 1) * sizeof(token));
for (stop = 0; str[stop]; stop++) {
if (str[stop] == sep) {
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
start = stop + 1;
}
}
/* Mop up the last token */
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
array = malloc((toks + 1) * sizeof(char*));
for (t = 0; t < toks; t++) {
/* Calloc makes it nul-terminated */
char *token = calloc(tokens[t].len + 1, 1);
memcpy(token, tokens[t].start, tokens[t].len);
array[t] = token;
}
/* Add a sentinel */
array[t] = NULL;
free(tokens);
return array;
}
malloc
为了简洁起见,省略了注释检查。
通常,我不会char *
从像这样的拆分函数中返回指针数组,因为它给调用者带来了很多责任以使其正确释放。我更喜欢的接口是允许调用者传递回调函数并为每个令牌调用此函数,如我在此处所述:在C中拆分一个String。
token
。
试试这个。
char** strsplit(char* str, const char* delim){
char** res = NULL;
char* part;
int i = 0;
char* aux = strdup(str);
part = strdup(strtok(aux, delim));
while(part){
res = (char**)realloc(res, (i + 1) * sizeof(char*));
*(res + i) = strdup(part);
part = strdup(strtok(NULL, delim));
i++;
}
res = (char**)realloc(res, i * sizeof(char*));
*(res + i) = NULL;
return res;
}
这种优化的方法在* result中创建(或更新现有的)指针数组,并在* count中返回元素的数量。
使用“ max”表示您期望的最大字符串数(当您指定现有数组或任何其他reaseon时),否则将其设置为0
要与定界符列表进行比较,请将delim定义为char *并替换以下行:
if (str[i]==delim) {
以下两行:
char *c=delim; while(*c && *c!=str[i]) c++;
if (*c) {
请享用
#include <stdlib.h>
#include <string.h>
char **split(char *str, size_t len, char delim, char ***result, unsigned long *count, unsigned long max) {
size_t i;
char **_result;
// there is at least one string returned
*count=1;
_result= *result;
// when the result array is specified, fill it during the first pass
if (_result) {
_result[0]=str;
}
// scan the string for delimiter, up to specified length
for (i=0; i<len; ++i) {
// to compare against a list of delimiters,
// define delim as a string and replace
// the next line:
// if (str[i]==delim) {
//
// with the two following lines:
// char *c=delim; while(*c && *c!=str[i]) c++;
// if (*c) {
//
if (str[i]==delim) {
// replace delimiter with zero
str[i]=0;
// when result array is specified, fill it during the first pass
if (_result) {
_result[*count]=str+i+1;
}
// increment count for each separator found
++(*count);
// if max is specified, dont go further
if (max && *count==max) {
break;
}
}
}
// when result array is specified, we are done here
if (_result) {
return _result;
}
// else allocate memory for result
// and fill the result array
*result=malloc((*count)*sizeof(char*));
if (!*result) {
return NULL;
}
_result=*result;
// add first string to result
_result[0]=str;
// if theres more strings
for (i=1; i<*count; ++i) {
// find next string
while(*str) ++str;
++str;
// add next string to result
_result[i]=str;
}
return _result;
}
用法示例:
#include <stdio.h>
int main(int argc, char **argv) {
char *str="JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
char **result=malloc(6*sizeof(char*));
char **result2=0;
unsigned long count;
unsigned long count2;
unsigned long i;
split(strdup(str),strlen(str),',',&result,&count,6);
split(strdup(str),strlen(str),',',&result2,&count2,0);
if (result)
for (i=0; i<count; ++i) {
printf("%s\n",result[i]);
}
printf("\n");
if (result2)
for (i=0; i<count2; ++i) {
printf("%s\n", result2[i]);
}
return 0;
}
我的版本:
int split(char* str, const char delimeter, char*** args) {
int cnt = 1;
char* t = str;
while (*t == delimeter) t++;
char* t2 = t;
while (*(t2++))
if (*t2 == delimeter && *(t2 + 1) != delimeter && *(t2 + 1) != 0) cnt++;
(*args) = malloc(sizeof(char*) * cnt);
for(int i = 0; i < cnt; i++) {
char* ts = t;
while (*t != delimeter && *t != 0) t++;
int len = (t - ts + 1);
(*args)[i] = malloc(sizeof(char) * len);
memcpy((*args)[i], ts, sizeof(char) * (len - 1));
(*args)[i][len - 1] = 0;
while (*t == delimeter) t++;
}
return cnt;
}
这是一个字符串拆分功能,可以处理多字符定界符。请注意,如果定界符比要拆分的字符串长,则buffer
and stringLengths
将设置为(void *) 0
,并且numStrings
将设置为0
。
该算法已经过测试,并且可以工作。(免责声明:尚未针对非ASCII字符串进行过测试,并且假定调用方提供了有效的参数)
void splitString(const char *original, const char *delimiter, char ** * buffer, int * numStrings, int * * stringLengths){
const int lo = strlen(original);
const int ld = strlen(delimiter);
if(ld > lo){
*buffer = (void *)0;
*numStrings = 0;
*stringLengths = (void *)0;
return;
}
*numStrings = 1;
for(int i = 0;i < (lo - ld);i++){
if(strncmp(&original[i], delimiter, ld) == 0) {
i += (ld - 1);
(*numStrings)++;
}
}
*stringLengths = (int *) malloc(sizeof(int) * *numStrings);
int currentStringLength = 0;
int currentStringNumber = 0;
int delimiterTokenDecrementCounter = 0;
for(int i = 0;i < lo;i++){
if(delimiterTokenDecrementCounter > 0){
delimiterTokenDecrementCounter--;
} else if(i < (lo - ld)){
if(strncmp(&original[i], delimiter, ld) == 0){
(*stringLengths)[currentStringNumber] = currentStringLength;
currentStringNumber++;
currentStringLength = 0;
delimiterTokenDecrementCounter = ld - 1;
} else {
currentStringLength++;
}
} else {
currentStringLength++;
}
if(i == (lo - 1)){
(*stringLengths)[currentStringNumber] = currentStringLength;
}
}
*buffer = (char **) malloc(sizeof(char *) * (*numStrings));
for(int i = 0;i < *numStrings;i++){
(*buffer)[i] = (char *) malloc(sizeof(char) * ((*stringLengths)[i] + 1));
}
currentStringNumber = 0;
currentStringLength = 0;
delimiterTokenDecrementCounter = 0;
for(int i = 0;i < lo;i++){
if(delimiterTokenDecrementCounter > 0){
delimiterTokenDecrementCounter--;
} else if(currentStringLength >= (*stringLengths)[currentStringNumber]){
(*buffer)[currentStringNumber][currentStringLength] = 0;
delimiterTokenDecrementCounter = ld - 1;
currentStringLength = 0;
currentStringNumber++;
} else {
(*buffer)[currentStringNumber][currentStringLength] = (char)original[i];
currentStringLength++;
}
}
buffer[currentStringNumber][currentStringLength] = 0;
}
样例代码:
int main(){
const char *string = "STRING-1 DELIM string-2 DELIM sTrInG-3";
char **buffer;
int numStrings;
int * stringLengths;
splitString(string, " DELIM ", &buffer, &numStrings, &stringLengths);
for(int i = 0;i < numStrings;i++){
printf("String: %s\n", buffer[i]);
}
}
图书馆:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
buffer
并将分配留给调用方,以处理最大大小的元素。
我的方法是扫描字符串,并让指针指向分隔符(和第一个字符)之后的每个字符,同时将字符串中分隔符的外观分配为'\ 0'。
首先制作一个原始字符串的副本(因为它是常量),然后通过扫描将其传递给指针参数len来获得分割数。之后,将第一个结果指针指向复制字符串指针,然后扫描复制字符串:遇到分隔符后,将其分配给“ \ 0”,从而终止前一个结果字符串,然后将下一个结果字符串指针指向下一个字符指针。
char** split(char* a_str, const char a_delim, int* len){
char* s = (char*)malloc(sizeof(char) * strlen(a_str));
strcpy(s, a_str);
char* tmp = a_str;
int count = 0;
while (*tmp != '\0'){
if (*tmp == a_delim) count += 1;
tmp += 1;
}
*len = count;
char** results = (char**)malloc(count * sizeof(char*));
results[0] = s;
int i = 1;
while (*s!='\0'){
if (*s == a_delim){
*s = '\0';
s += 1;
results[i++] = s;
}
else s += 1;
}
return results;
}
我的代码(经过测试):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int dtmsplit(char *str, const char *delim, char ***array, int *length ) {
int i=0;
char *token;
char **res = (char **) malloc(0 * sizeof(char *));
/* get the first token */
token = strtok(str, delim);
while( token != NULL )
{
res = (char **) realloc(res, (i + 1) * sizeof(char *));
res[i] = token;
i++;
token = strtok(NULL, delim);
}
*array = res;
*length = i;
return 1;
}
int main()
{
int i;
int c = 0;
char **arr = NULL;
int count =0;
char str[80] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
c = dtmsplit(str, ",", &arr, &count);
printf("Found %d tokens.\n", count);
for (i = 0; i < count; i++)
printf("string #%d: %s\n", i, arr[i]);
return(0);
}
结果:
Found 12 tokens.
string #0: JAN
string #1: FEB
string #2: MAR
string #3: APR
string #4: MAY
string #5: JUN
string #6: JUL
string #7: AUG
string #8: SEP
string #9: OCT
string #10: NOV
string #11: DEC
爆炸和爆炸-初始字符串保持不变,动态内存分配
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
typedef struct
{
uintptr_t ptr;
int size;
} token_t;
int explode(char *str, int slen, const char *delimiter, token_t **tokens)
{
int i = 0, c1 = 0, c2 = 0;
for(i = 0; i <= slen; i++)
{
if(str[i] == *delimiter)
{
c1++;
}
}
if(c1 == 0)
{
return -1;
}
*tokens = (token_t*)calloc((c1 + 1), sizeof(token_t));
((*tokens)[c2]).ptr = (uintptr_t)str;
i = 0;
while(i <= slen)
{
if((str[i] == *delimiter) || (i == slen))
{
((*tokens)[c2]).size = (int)((uintptr_t)&(str[i]) - (uintptr_t)(((*tokens)[c2]).ptr));
if(i < slen)
{
c2++;
((*tokens)[c2]).ptr = (uintptr_t)&(str[i + 1]);
}
}
i++;
}
return (c1 + 1);
}
char* implode(token_t *tokens, int size, const char *delimiter)
{
int i, len = 0;
char *str;
for(i = 0; i < len; i++)
{
len += tokens[i].size + 1;
}
str = (char*)calloc(len, sizeof(char));
len = 0;
for(i = 0; i < size; i++)
{
memcpy((void*)&str[len], (void*)tokens[i].ptr, tokens[i].size);
len += tokens[i].size;
str[(len++)] = *delimiter;
}
str[len - 1] = '\0';
return str;
}
用法:
int main(int argc, char **argv)
{
int i, c;
char *exp = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
token_t *tokens;
char *imp;
printf("%s\n", exp);
if((c = explode(exp, strlen(exp), ",", &tokens)) > 0)
{
imp = implode(tokens, c, ",");
printf("%s\n", imp);
for(i = 0; i < c; i++)
{
printf("%.*s, %d\n", tokens[i].size, (char*)tokens[i].ptr, tokens[i].size);
}
}
free((void*)tokens);
free((void*)imp);
return 0;
}
如果您愿意使用外部库,我不建议您 bstrlib
。它需要一些额外的设置,但从长远来看更易于使用。
例如,在下面分割字符串,首先创建一个bstring
带有的bfromcstr()
调用。(A bstring
是围绕char缓冲区的包装器)。接下来,将字符串拆分为逗号,将结果保存在中struct bstrList
,其中包含字段qty
和数组entry
,该数组是bstring
s 的数组。
bstrlib
还有许多其他功能可以在bstring
s 上操作
非常简单...
#include "bstrlib.h"
#include <stdio.h>
int main() {
int i;
char *tmp = "Hello,World,sak";
bstring bstr = bfromcstr(tmp);
struct bstrList *blist = bsplit(bstr, ',');
printf("num %d\n", blist->qty);
for(i=0;i<blist->qty;i++) {
printf("%d: %s\n", i, bstr2cstr(blist->entry[i], '_'));
}
}
尝试使用strtok函数:
这里的问题是您必须words
立即处理。如果要将其存储在数组中,则必须为其分配correct size
未知的女巫。
因此,例如:
char **Split(char *in_text, char *in_sep)
{
char **ret = NULL;
int count = 0;
char *tmp = strdup(in_text);
char *pos = tmp;
// This is the pass ONE: we count
while ((pos = strtok(pos, in_sep)) != NULL)
{
count++;
pos = NULL;
}
// NOTE: the function strtok changes the content of the string! So we free and duplicate it again!
free(tmp);
pos = tmp = strdup(in_text);
// We create a NULL terminated array hence the +1
ret = calloc(count+1, sizeof(char*));
// TODO: You have to test the `ret` for NULL here
// This is the pass TWO: we store
count = 0;
while ((pos = strtok(pos, in_sep)) != NULL)
{
ret[count] = strdup(pos);
count++;
pos = NULL;
}
free(tmp);
return count;
}
// Use this to free
void Free_Array(char** in_array)
{
char *pos = in_array;
while (pos[0] != NULL)
{
free(pos[0]);
pos++;
}
free(in_array);
}
注意:为了避免分配问题,我们使用相同的循环和函数来计算计数(通过一遍)并制作副本(通过第二遍)。
注意2:您可以在单独的帖子中使用strtok的其他实现原因。
您可以这样使用:
int main(void)
{
char **array = Split("Hello World!", " ");
// Now you have the array
// ...
// Then free the memory
Free_Array(array);
array = NULL;
return 0;
}
(我没有对其进行测试,所以如果不起作用请通知我!)
围绕此问题的两个问题是内存管理和线程安全。从众多文章中可以看到,要在C中无缝地完成这项任务并不容易。我希望有一个解决方案:
我提出的解决方案符合所有这些条件。设置这里的工作可能比这里发布的其他解决方案要多一些,但是我认为在实践中,为了避免其他解决方案的常见陷阱,值得进行额外的工作。
#include <stdio.h>
#include <string.h>
struct splitFieldType {
char *field;
int maxLength;
};
typedef struct splitFieldType splitField;
int strsplit(splitField *fields, int expected, const char *input, const char *fieldSeparator, void (*softError)(int fieldNumber,int expected,int actual)) {
int i;
int fieldSeparatorLen=strlen(fieldSeparator);
const char *tNext, *tLast=input;
for (i=0; i<expected && (tNext=strstr(tLast, fieldSeparator))!=NULL; ++i) {
int len=tNext-tLast;
if (len>=fields[i].maxLength) {
softError(i,fields[i].maxLength-1,len);
len=fields[i].maxLength-1;
}
fields[i].field[len]=0;
strncpy(fields[i].field,tLast,len);
tLast=tNext+fieldSeparatorLen;
}
if (i<expected) {
if (strlen(tLast)>fields[i].maxLength) {
softError(i,fields[i].maxLength,strlen(tLast));
} else {
strcpy(fields[i].field,tLast);
}
return i+1;
} else {
return i;
}
}
void monthSplitSoftError(int fieldNumber, int expected, int actual) {
fprintf(stderr,"monthSplit: input field #%d is %d bytes, expected %d bytes\n",fieldNumber+1,actual,expected);
}
int main() {
const char *fieldSeparator=",";
const char *input="JAN,FEB,MAR,APRI,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR";
struct monthFieldsType {
char field1[4];
char field2[4];
char field3[4];
char field4[4];
char field5[4];
char field6[4];
char field7[4];
char field8[4];
char field9[4];
char field10[4];
char field11[4];
char field12[4];
} monthFields;
splitField inputFields[12] = {
{monthFields.field1, sizeof(monthFields.field1)},
{monthFields.field2, sizeof(monthFields.field2)},
{monthFields.field3, sizeof(monthFields.field3)},
{monthFields.field4, sizeof(monthFields.field4)},
{monthFields.field5, sizeof(monthFields.field5)},
{monthFields.field6, sizeof(monthFields.field6)},
{monthFields.field7, sizeof(monthFields.field7)},
{monthFields.field8, sizeof(monthFields.field8)},
{monthFields.field9, sizeof(monthFields.field9)},
{monthFields.field10, sizeof(monthFields.field10)},
{monthFields.field11, sizeof(monthFields.field11)},
{monthFields.field12, sizeof(monthFields.field12)}
};
int expected=sizeof(inputFields)/sizeof(splitField);
printf("input data: %s\n", input);
printf("expecting %d fields\n",expected);
int ct=strsplit(inputFields, expected, input, fieldSeparator, monthSplitSoftError);
if (ct!=expected) {
printf("string split %d fields, expected %d\n", ct,expected);
}
for (int i=0;i<expected;++i) {
printf("field %d: %s\n",i+1,inputFields[i].field);
}
printf("\n");
printf("Direct structure access, field 10: %s", monthFields.field10);
}
下面是一个示例编译和输出。请注意,在我的示例中,我故意拼出了“ APRIL”,以便您可以看到软错误的工作原理。
$ gcc strsplitExample.c && ./a.out
input data: JAN,FEB,MAR,APRIL,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,FOO,BAR
expecting 12 fields
monthSplit: input field #4 is 5 bytes, expected 3 bytes
field 1: JAN
field 2: FEB
field 3: MAR
field 4: APR
field 5: MAY
field 6: JUN
field 7: JUL
field 8: AUG
field 9: SEP
field 10: OCT
field 11: NOV
field 12: DEC
Direct structure access, field 10: OCT
请享用!
这是另一个可以安全操作的实现,用于对与问题中请求的原型匹配的字符串字面量进行标记化,从而将分配的指针对指针返回给char(例如char **
)。分隔符字符串可以包含多个字符,输入字符串可以包含任意数量的标记。所有分配和重新分配均由POSIX 处理malloc
或realloc
不由POSIX 处理strdup
。
分配的指针的初始数量由NPTRS
常量控制,唯一的限制是它大于零。该char **
返回包含一个定点 NULL
后的最后一个记号相似*argv[]
,并通过形式使用execv
,execvp
和execve
。
与strtok()
多个顺序分隔符视为单个分隔符,所以"JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC"
会被解析为如果只有一个','
中隔离"MAY,JUN"
。
下面的函数被内联注释,并main()
添加了一个短短字,以分隔月份。分配的初始指针数设置为2
,在标记输入字符串期间强制进行三个重新分配:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NPTRS 2 /* initial number of pointers to allocate (must be > 0) */
/* split src into tokens with sentinel NULL after last token.
* return allocated pointer-to-pointer with sentinel NULL on success,
* or NULL on failure to allocate initial block of pointers. The number
* of allocated pointers are doubled each time reallocation required.
*/
char **strsplit (const char *src, const char *delim)
{
int i = 0, in = 0, nptrs = NPTRS; /* index, in/out flag, ptr count */
char **dest = NULL; /* ptr-to-ptr to allocate/fill */
const char *p = src, *ep = p; /* pointer and end-pointer */
/* allocate/validate nptrs pointers for dest */
if (!(dest = malloc (nptrs * sizeof *dest))) {
perror ("malloc-dest");
return NULL;
}
*dest = NULL; /* set first pointer as sentinel NULL */
for (;;) { /* loop continually until end of src reached */
if (!*ep || strchr (delim, *ep)) { /* if at nul-char or delimiter char */
size_t len = ep - p; /* get length of token */
if (in && len) { /* in-word and chars in token */
if (i == nptrs - 1) { /* used pointer == allocated - 1? */
/* realloc dest to temporary pointer/validate */
void *tmp = realloc (dest, 2 * nptrs * sizeof *dest);
if (!tmp) {
perror ("realloc-dest");
break; /* don't exit, original dest still valid */
}
dest = tmp; /* assign reallocated block to dest */
nptrs *= 2; /* increment allocated pointer count */
}
/* allocate/validate storage for token */
if (!(dest[i] = malloc (len + 1))) {
perror ("malloc-dest[i]");
break;
}
memcpy (dest[i], p, len); /* copy len chars to storage */
dest[i++][len] = 0; /* nul-terminate, advance index */
dest[i] = NULL; /* set next pointer NULL */
}
if (!*ep) /* if at end, break */
break;
in = 0; /* set in-word flag 0 (false) */
}
else { /* normal word char */
if (!in) /* if not in-word */
p = ep; /* update start to end-pointer */
in = 1; /* set in-word flag 1 (true) */
}
ep++; /* advance to next character */
}
return dest;
}
int main (void) {
char *str = "JAN,FEB,MAR,APR,MAY,,,JUN,JUL,AUG,SEP,OCT,NOV,DEC",
**tokens; /* pointer to pointer to char */
if ((tokens = strsplit (str, ","))) { /* split string into tokens */
for (char **p = tokens; *p; p++) { /* loop over filled pointers */
puts (*p);
free (*p); /* don't forget to free allocated strings */
}
free (tokens); /* and pointers */
}
}
使用/输出示例
$ ./bin/splitinput
JAN
FEB
MAR
APR
MAY
JUN
JUL
AUG
SEP
OCT
NOV
DEC
如果您还有其他问题,请告诉我。
strtok
标准库中的函数来实现同一目的。