Last active
August 29, 2015 14:06
-
-
Save deqing/e09f5c0a912018864e12 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
using namespace std; | |
char** str_split(const char* input, int* num) { | |
/* implement please */ | |
*num = 2; | |
int len = strlen(input); | |
// escape the leading whitespace | |
while (*input == ' ' || *input == '\t') | |
{ | |
++input; | |
} | |
// find out how many sub strings (with 'NULL') | |
const char *p = input; | |
bool quote1 = false, quote2 = false; | |
while (*p) | |
{ | |
if (*p == '\'') | |
{ | |
quote1 = !quote1; | |
++p; | |
} | |
else if (*p == '\"') | |
{ | |
quote2 = !quote2; | |
++p; | |
} | |
else if (*p == '\\') | |
{ | |
p += 2; | |
} | |
else if (quote1 || quote2) | |
{ | |
++p; | |
} | |
else if (*p == ' ' || *p == '\t') | |
{ | |
while (*p == ' ' || *p == '\t') | |
{ | |
++p; | |
} | |
if (*p)++*num; | |
} | |
else | |
{ | |
p++; | |
} | |
} | |
int i = 0; | |
char *buf = new char[len + 5 + sizeof(char *) * * num]; | |
// because Allocate memory only once, we put all data into one buffer. | |
// buffer's format: | |
// [first substring's address, four byte(32bit)][second substring's address, four byte]... | |
// [last substring(NULL)'s address][first substring][second substring]...[last substring(NULL)] | |
char **output = (char **)buf; | |
char *q = buf + sizeof(char *) * *num; | |
output[i++] = q; | |
p = input; | |
quote1 = false; quote2 = false; | |
while (*p) | |
{ | |
if (*p == '\'') | |
{ | |
quote1 = !quote1; | |
++p; | |
} | |
else if (*p == '\"') | |
{ | |
quote2 = !quote2; | |
++p; | |
} | |
else if (*p == '\\') | |
{ | |
*q++ = *++p; | |
++p; | |
} | |
else if (quote1 || quote2) | |
{ | |
*q++ = *p++; | |
} | |
else if (*p == ' ' || *p == '\t') | |
{ | |
*q++ = 0; | |
while (*p == ' ' || *p == '\t') | |
{ | |
++p; | |
} | |
if (*p) output[i++] = q; | |
} | |
else | |
{ | |
*q++ = *p++; | |
} | |
} | |
*q++ = '\0'; | |
output[i++] = q; | |
*q++ = 'N'; | |
*q++ = 'U'; | |
*q++ = 'L'; | |
*q++ = 'L'; | |
*q++ = '\0'; | |
return output; | |
} | |
int main(int argc, char* argv[]) { | |
int num; | |
//const char* input = " find . -name *.c "; | |
const char* input = " grep 'char c = \\\'x\\\';' my\\\ file "; | |
char** output = str_split(input, &num); | |
for (int i = 0; i < num; i++) | |
printf("output[%d] = \"%s\"\n", i, output[i]); | |
/* free memory please */ | |
delete[]output; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
using namespace std; | |
bool IsWhitespace(const char* input, int i) { | |
return (input[i] == ' ' || input[i] == '\t') && | |
(i == 0 || input[i - 1] != '\\'); | |
} | |
bool IsQuotation(const char* input, int i) { | |
return (input[i] == '\'' || input[i] == '\"') && | |
(i == 0 || input[i - 1] != '\\'); | |
} | |
bool IsEscapeChar(const char* input, int i) { | |
return input[i] == '\\' && | |
(input[i + 1] == '\'' || input[i + 1] == '\"' || | |
input[i + 1] == ' ' || input[i + 1] == '\\'); | |
} | |
void ToOutput(const char* input, char** output, | |
int sub_start, int sub_end, char quotation) { | |
// Get the number of escape characters and | |
// quotation characters, and don't add them | |
// to the output. | |
int offset = 0; | |
for (int k = sub_start; k < sub_end; k++) { | |
if (IsQuotation(input, k) && input[k] == quotation || | |
IsEscapeChar(input, k)) { | |
offset++; | |
} | |
} | |
*output = new char[sub_end - sub_start + 1 - offset]; | |
offset = 0; | |
for (int k = sub_start; k < sub_end; k++) { | |
if (IsQuotation(input, k) && input[k] == quotation || | |
IsEscapeChar(input, k)) { | |
offset++; | |
continue; | |
} | |
(*output)[k - sub_start - offset] = input[k]; | |
} | |
(*output)[sub_end - sub_start - offset] = 0; | |
} | |
char** str_split(const char* input, int* num) { | |
*num = 0; | |
if (!input) { | |
return 0; | |
} | |
// Ignore the leading and trailing whitespaces. | |
int len; | |
int start, end; | |
int i; | |
for (len = 0; input[len]; len++); | |
for (i = 0; IsWhitespace(input, i); i++); | |
start = i; | |
for (i = len - 1; i >= 0 && IsWhitespace(input, i); i--); | |
end = i + 1; | |
// Get the the number of substrings after splitting | |
// the original string, and check the validity of | |
// the original string. | |
int count = 1; | |
bool in_quotations = 0; | |
char start_quotation; | |
for (int i = start; i < end; i++) { | |
if (IsQuotation(input, i)) { // Deal with the strings in the quotations. | |
if (!in_quotations) { | |
in_quotations = 1; | |
start_quotation = input[i]; | |
} else { | |
if (input[i] != start_quotation) continue; // 'abc"abc"' or "abc'abc'" | |
if (i + 1 < end && IsQuotation(input, i + 1)) { // 'abc''abc' | |
i++; | |
continue; | |
} | |
in_quotations = 0; | |
count++; | |
} | |
continue; | |
} | |
if (in_quotations) continue; | |
if (!IsWhitespace(input, i) && | |
(i == start || IsWhitespace(input, i - 1))) { | |
count++; | |
} | |
} | |
if (in_quotations) { // 'abc | |
fprintf(stderr, "Quotations Error!\n"); | |
return 0; | |
} | |
// Generate the substrings. | |
char **ret = new char*[count]; | |
int sub_start; | |
int j = 0; | |
for (int i = start; i < end; i++) { | |
if (IsQuotation(input, i)) { | |
if (!in_quotations) { | |
in_quotations = 1; | |
start_quotation = input[i]; | |
sub_start = i; | |
} else { | |
if (input[i] != start_quotation) continue; | |
if (i + 1 < end && IsQuotation(input, i + 1)) { | |
i++; | |
continue; | |
} | |
in_quotations = 0; | |
ToOutput(input, &ret[j], sub_start, i + 1, start_quotation); | |
j++; | |
} | |
continue; | |
} | |
if (in_quotations) continue; | |
if (!IsWhitespace(input, i) && | |
(i == start || IsWhitespace(input, i - 1))) { | |
sub_start = i; | |
} | |
if (!IsWhitespace(input, i) && | |
(i == end - 1 || IsWhitespace(input, i + 1))) { | |
ToOutput(input, &ret[j], sub_start, i + 1, 0); | |
j++; | |
} | |
} | |
ret[count - 1] = 0; | |
*num = count; | |
return ret; | |
} | |
int main(int argc, char* argv[]) { | |
int num; | |
const char *input = " grep 'char c = \\'x\\';' my\\ file "; | |
char **output = str_split(input, &num); | |
for (int i = 0; i < num; i++) | |
printf("output[%d] = \"%s\"\n", i, output[i]); | |
for (int i = 0; i < num; i++) { | |
delete[] output[i]; | |
output[i] = 0; | |
} | |
delete[] output; | |
output = 0; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
char** str_split(const char* input, int* num) | |
{ | |
int len = strlen(input); | |
while (*input == ' ' || *input == '\t') | |
{ | |
++input; | |
} | |
char *p = input; | |
while (*p) | |
{ | |
if (*p == '\'' || *p == '\"' | *p == '\\') | |
{ | |
p += 2; | |
} | |
else if (*p == ' ' || *p == '\t') | |
{ | |
while (*p == ' ' || *p == '\t') | |
{ | |
++p; | |
} | |
if (*p) | |
{ | |
++*num; | |
} | |
} | |
else | |
{ | |
p++; | |
} | |
} | |
int i = 0; | |
char *buf = malloc(5 + len+ sizeof(char *) * (*num)); | |
char **output = (char **)buf; | |
char *q = buf + sizeof(char *) * *num; | |
output[i++] = q; | |
p = input; | |
while (*p) | |
{ | |
if (*p == '\'' || *p == '\"' || *p == '\\') | |
{ | |
++p; | |
*q++ = *p++; | |
} | |
else if (*p == ' ' || *p == '\t') | |
{ | |
*q++ = 0; | |
while (*p == ' ' || *p == '\t') | |
{ | |
++p; | |
} | |
if (*p) | |
{ | |
output[i++] = q; | |
} | |
} | |
else | |
{ | |
*q++ = *p++; | |
} | |
} | |
*q++ = '\0'; | |
output[i++] = q; | |
*q++ = 'N'; | |
*q++ = 'U'; | |
*q++ = 'L'; | |
*q++ = 'L'; | |
*q++ = '\0'; | |
return output; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
int num = 2; | |
const char* input = " find . -name *.c my\ file"; | |
char** output = str_split(input, &num); | |
for (int i = 0; i < num; i++) | |
{ | |
printf("output[%d] = \"%s\"\n", i, output[i]); | |
} | |
free(output); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define MAX 64 | |
struct strarray { | |
char *strings[MAX]; | |
size_t size; | |
}; | |
static struct strarray foo(const char* s); | |
int main(int argc, char** argv) { | |
int i = 0; | |
struct strarray array = foo(""); | |
for (i = 0; i < array.size; ++i) { | |
printf("%s\n", array.strings[i]); | |
free(array.strings[i]); | |
} | |
array = foo("abc def gh z"); | |
for (i = 0; i < array.size; ++i) { | |
printf("%s\n", array.strings[i]); | |
free(array.strings[i]); | |
} | |
return 0; | |
} | |
static struct strarray foo(const char* s) | |
{ | |
struct strarray array; | |
size_t len = strlen(s); | |
int begin = 0, i, j = 0; | |
for (i = 0; i < len + 1; ++i) { | |
if ((s[i] == ' ') || (s[i] == '\0')) { | |
if (i - begin > 0) { | |
// TODO check NULL pointer. | |
array.strings[j] = (char *) malloc(i - begin + 1); | |
strncpy(array.strings[j], &s[begin], i - begin); | |
array.strings[j][i - begin] = '\0'; | |
j++; | |
} | |
begin = i + 1; | |
} | |
} | |
array.size = j; | |
return array; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <assert.h> | |
#include <string.h> | |
using namespace std; | |
char* erase_blank( const char* input , int& blankNum ) | |
{//erase the excess blank in input , return the result string , blankNum is the output argument | |
assert( input != NULL ); | |
int len = strlen( input ); | |
char* result = new char[len+2+4];//Advanced:Allocate memory only once , 2 for two '\0' , 4 for "NULL" | |
for( int i=0 ; i<=len ; i++ ) | |
result[i] = '\0'; | |
int begin,end=len; | |
for( begin = 0 ; begin<len&&(input[begin]==' '||input[begin]=='\t') ; begin++ );//first location without blank in input | |
for( end=len-1 ; end>=0&&(input[end]==' '||input[end]=='\t') ; end-- );//last location without blank in input | |
int left = 0;//the loc in result | |
int right = begin;//the loc in input | |
char Quotation = '\0'; | |
blankNum = 0;//output,the valid blank num in result | |
while( right <= end ) | |
{ | |
if( Quotation == '\0' ) | |
{ | |
if( input[right]==' ' || input[right]=='\t' ) | |
{ | |
result[left++] = '\0';//copy only on blank | |
blankNum++; | |
while( right<=end && (input[right]==' '||input[right]=='\t') ) | |
right++;//skip continous blank | |
} | |
else if( input[right]=='\'' || input[right]=='\"' ) | |
{ | |
Quotation = input[right];//set the Quotation | |
right++; | |
} | |
else | |
{ | |
if( input[right]=='\\' ) | |
right++; | |
result[left] = input[right];//copy common char | |
left++; | |
right++; | |
} | |
} | |
else | |
{ | |
if( input[right]==Quotation ) | |
{ | |
Quotation = '\0';//remove the Quotation | |
right++; | |
} | |
else | |
{ | |
if( input[right]=='\\' ) | |
right++; | |
result[left] = input[right];//copy common char | |
left++; | |
right++; | |
} | |
} | |
} | |
result[left] = '\0'; | |
result[left+1] = 'N'; | |
result[left+2] = 'U'; | |
result[left+3] = 'L'; | |
result[left+4] = 'L'; | |
result[left+5] = '\0'; | |
blankNum++; | |
return result; | |
} | |
char** str_split( const char* input, int& num ) | |
{//Prototype | |
assert( input!=NULL ); | |
char* tmp = erase_blank( input , num ); | |
char** result = new char*[++num]; | |
int begin = 0 , end = 0; | |
int cur = 0; | |
while( cur<num ) | |
{ | |
if( tmp[end] == '\0') | |
{ | |
result[cur++] = tmp+begin; | |
begin = ++end; | |
} | |
else | |
end++; | |
} | |
return result; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
int num; | |
const char* input = " grep 'char c = \\\'x\\\';' my\\\ file "; | |
//const char* input = " grep 'char c = \'x\';' my\ file "; | |
//const char* input = " find . -name *.c "; | |
//cout << "input:\"" << input << '\"'<< endl << endl; | |
/* | |
注意:我认为StringSplit中此处给的样例输入有问题 | |
在双引号内单个\会自动认为是转义,所以 | |
" grep 'char c = \'x\';' my\ file " | |
实际上等价于 | |
" grep 'char c = 'x';' my file ",这样会给程序的解析带来歧义 | |
此处我把input改为 | |
" grep 'char c = \\\'x\\\';' my\\\ file " | |
实际上即为 | |
" grep 'char c = \'x\';' my\ file ",此处的\不再表示转义,而是字符'\',程序对这种情况能正常解析 | |
不知道我这样做对不对,如果有问题请及时联系我,我再想办法修改,谢谢! | |
*/ | |
char** output = str_split( input, num ); | |
cout << "char** output[" << num << "];" << endl; | |
for ( int i = 0; i < num; i++) | |
cout << "output[" << i << "] = " << "\"" << output[i] << "\"" << endl; | |
delete output[0]; | |
delete output; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstring> | |
#include <cstdlib> | |
#include <iostream> | |
using namespace std; | |
char** str_split(const char* input, int* num) { | |
int sz = strlen(input); | |
const char * st = input; | |
const char * end = input + sz-1; | |
while(st<end && (*st==' ' || *st=='\t')) ++st;//非空白字符开头 | |
if(st==end) return NULL; | |
while(end>=st && (*end==' ' || *end=='\t')) --end;//非空白字符结尾 | |
if(end<st) return NULL; | |
const char* p = st; | |
int split = 1;//可分的子串数目 | |
while(p<=end){ | |
if(*p==' ' || *p=='\t'){ | |
++split; | |
while(p<=end && (*p==' ' || *p=='\t')) ++p;//移动到下一个非空字符 | |
} | |
++p; | |
} | |
int len = split*sizeof(void*)+sz+1; //要分配的内存大小 | |
char ** output = (char**) malloc(len); //只分配一次内存,前面存放char*指针,后面存放字符串 | |
int offset=split*sizeof(void*);//第一个子串在分配内存中的偏移 | |
char * new_str=strcpy((char*)output+offset, st );//首先,复制原串到分配的内存 | |
char* max_end = new_str + (end-st) +1 ;//所有子串在分配内存中的结尾 | |
*max_end=0;//置为'\0' | |
output[0] = new_str;//记录第一个子串起始位置 | |
int n=1; | |
while(new_str<max_end){ | |
if(*new_str==' '||*new_str=='\t'){ | |
*new_str=0;//填充子串结尾 | |
++new_str; | |
while(new_str<max_end && (*new_str==' '||*new_str=='\t') ){ //寻找下一个子串起始位置 | |
++new_str; | |
} | |
if(new_str==max_end) break; | |
output[n] = new_str;//记录子串起始位置 | |
++n; | |
} | |
else{ | |
++new_str; | |
} | |
} | |
*num = n; | |
return output; | |
} | |
int main() { | |
int num; | |
const char* input = " find . -name *.c "; | |
char** output = str_split(input, &num); | |
for (int i = 0; i < num; i++) | |
printf("output[%d] = \"%s\"\n", i, output[i]); | |
/* free memory please */ | |
free(output); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment