Skip to content

Instantly share code, notes, and snippets.

@deqing
Last active August 29, 2015 14:06
Show Gist options
  • Save deqing/e09f5c0a912018864e12 to your computer and use it in GitHub Desktop.
Save deqing/e09f5c0a912018864e12 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
using namespace std;
char** str_split(const char* input, int* num) {
/* implement please */
*num = 2;
int len = strlen(input);
// escape the leading whitespace
while (*input == ' ' || *input == '\t')
{
++input;
}
// find out how many sub strings (with 'NULL')
const char *p = input;
bool quote1 = false, quote2 = false;
while (*p)
{
if (*p == '\'')
{
quote1 = !quote1;
++p;
}
else if (*p == '\"')
{
quote2 = !quote2;
++p;
}
else if (*p == '\\')
{
p += 2;
}
else if (quote1 || quote2)
{
++p;
}
else if (*p == ' ' || *p == '\t')
{
while (*p == ' ' || *p == '\t')
{
++p;
}
if (*p)++*num;
}
else
{
p++;
}
}
int i = 0;
char *buf = new char[len + 5 + sizeof(char *) * * num];
// because Allocate memory only once, we put all data into one buffer.
// buffer's format:
// [first substring's address, four byte(32bit)][second substring's address, four byte]...
// [last substring(NULL)'s address][first substring][second substring]...[last substring(NULL)]
char **output = (char **)buf;
char *q = buf + sizeof(char *) * *num;
output[i++] = q;
p = input;
quote1 = false; quote2 = false;
while (*p)
{
if (*p == '\'')
{
quote1 = !quote1;
++p;
}
else if (*p == '\"')
{
quote2 = !quote2;
++p;
}
else if (*p == '\\')
{
*q++ = *++p;
++p;
}
else if (quote1 || quote2)
{
*q++ = *p++;
}
else if (*p == ' ' || *p == '\t')
{
*q++ = 0;
while (*p == ' ' || *p == '\t')
{
++p;
}
if (*p) output[i++] = q;
}
else
{
*q++ = *p++;
}
}
*q++ = '\0';
output[i++] = q;
*q++ = 'N';
*q++ = 'U';
*q++ = 'L';
*q++ = 'L';
*q++ = '\0';
return output;
}
int main(int argc, char* argv[]) {
int num;
//const char* input = " find . -name *.c ";
const char* input = " grep 'char c = \\\'x\\\';' my\\\ file ";
char** output = str_split(input, &num);
for (int i = 0; i < num; i++)
printf("output[%d] = \"%s\"\n", i, output[i]);
/* free memory please */
delete[]output;
return 0;
}
#include <cstdio>
using namespace std;
bool IsWhitespace(const char* input, int i) {
return (input[i] == ' ' || input[i] == '\t') &&
(i == 0 || input[i - 1] != '\\');
}
bool IsQuotation(const char* input, int i) {
return (input[i] == '\'' || input[i] == '\"') &&
(i == 0 || input[i - 1] != '\\');
}
bool IsEscapeChar(const char* input, int i) {
return input[i] == '\\' &&
(input[i + 1] == '\'' || input[i + 1] == '\"' ||
input[i + 1] == ' ' || input[i + 1] == '\\');
}
void ToOutput(const char* input, char** output,
int sub_start, int sub_end, char quotation) {
// Get the number of escape characters and
// quotation characters, and don't add them
// to the output.
int offset = 0;
for (int k = sub_start; k < sub_end; k++) {
if (IsQuotation(input, k) && input[k] == quotation ||
IsEscapeChar(input, k)) {
offset++;
}
}
*output = new char[sub_end - sub_start + 1 - offset];
offset = 0;
for (int k = sub_start; k < sub_end; k++) {
if (IsQuotation(input, k) && input[k] == quotation ||
IsEscapeChar(input, k)) {
offset++;
continue;
}
(*output)[k - sub_start - offset] = input[k];
}
(*output)[sub_end - sub_start - offset] = 0;
}
char** str_split(const char* input, int* num) {
*num = 0;
if (!input) {
return 0;
}
// Ignore the leading and trailing whitespaces.
int len;
int start, end;
int i;
for (len = 0; input[len]; len++);
for (i = 0; IsWhitespace(input, i); i++);
start = i;
for (i = len - 1; i >= 0 && IsWhitespace(input, i); i--);
end = i + 1;
// Get the the number of substrings after splitting
// the original string, and check the validity of
// the original string.
int count = 1;
bool in_quotations = 0;
char start_quotation;
for (int i = start; i < end; i++) {
if (IsQuotation(input, i)) { // Deal with the strings in the quotations.
if (!in_quotations) {
in_quotations = 1;
start_quotation = input[i];
} else {
if (input[i] != start_quotation) continue; // 'abc"abc"' or "abc'abc'"
if (i + 1 < end && IsQuotation(input, i + 1)) { // 'abc''abc'
i++;
continue;
}
in_quotations = 0;
count++;
}
continue;
}
if (in_quotations) continue;
if (!IsWhitespace(input, i) &&
(i == start || IsWhitespace(input, i - 1))) {
count++;
}
}
if (in_quotations) { // 'abc
fprintf(stderr, "Quotations Error!\n");
return 0;
}
// Generate the substrings.
char **ret = new char*[count];
int sub_start;
int j = 0;
for (int i = start; i < end; i++) {
if (IsQuotation(input, i)) {
if (!in_quotations) {
in_quotations = 1;
start_quotation = input[i];
sub_start = i;
} else {
if (input[i] != start_quotation) continue;
if (i + 1 < end && IsQuotation(input, i + 1)) {
i++;
continue;
}
in_quotations = 0;
ToOutput(input, &ret[j], sub_start, i + 1, start_quotation);
j++;
}
continue;
}
if (in_quotations) continue;
if (!IsWhitespace(input, i) &&
(i == start || IsWhitespace(input, i - 1))) {
sub_start = i;
}
if (!IsWhitespace(input, i) &&
(i == end - 1 || IsWhitespace(input, i + 1))) {
ToOutput(input, &ret[j], sub_start, i + 1, 0);
j++;
}
}
ret[count - 1] = 0;
*num = count;
return ret;
}
int main(int argc, char* argv[]) {
int num;
const char *input = " grep 'char c = \\'x\\';' my\\ file ";
char **output = str_split(input, &num);
for (int i = 0; i < num; i++)
printf("output[%d] = \"%s\"\n", i, output[i]);
for (int i = 0; i < num; i++) {
delete[] output[i];
output[i] = 0;
}
delete[] output;
output = 0;
return 0;
}
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char** str_split(const char* input, int* num)
{
int len = strlen(input);
while (*input == ' ' || *input == '\t')
{
++input;
}
char *p = input;
while (*p)
{
if (*p == '\'' || *p == '\"' | *p == '\\')
{
p += 2;
}
else if (*p == ' ' || *p == '\t')
{
while (*p == ' ' || *p == '\t')
{
++p;
}
if (*p)
{
++*num;
}
}
else
{
p++;
}
}
int i = 0;
char *buf = malloc(5 + len+ sizeof(char *) * (*num));
char **output = (char **)buf;
char *q = buf + sizeof(char *) * *num;
output[i++] = q;
p = input;
while (*p)
{
if (*p == '\'' || *p == '\"' || *p == '\\')
{
++p;
*q++ = *p++;
}
else if (*p == ' ' || *p == '\t')
{
*q++ = 0;
while (*p == ' ' || *p == '\t')
{
++p;
}
if (*p)
{
output[i++] = q;
}
}
else
{
*q++ = *p++;
}
}
*q++ = '\0';
output[i++] = q;
*q++ = 'N';
*q++ = 'U';
*q++ = 'L';
*q++ = 'L';
*q++ = '\0';
return output;
}
int main(int argc, char* argv[])
{
int num = 2;
const char* input = " find . -name *.c my\ file";
char** output = str_split(input, &num);
for (int i = 0; i < num; i++)
{
printf("output[%d] = \"%s\"\n", i, output[i]);
}
free(output);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 64
struct strarray {
char *strings[MAX];
size_t size;
};
static struct strarray foo(const char* s);
int main(int argc, char** argv) {
int i = 0;
struct strarray array = foo("");
for (i = 0; i < array.size; ++i) {
printf("%s\n", array.strings[i]);
free(array.strings[i]);
}
array = foo("abc def gh z");
for (i = 0; i < array.size; ++i) {
printf("%s\n", array.strings[i]);
free(array.strings[i]);
}
return 0;
}
static struct strarray foo(const char* s)
{
struct strarray array;
size_t len = strlen(s);
int begin = 0, i, j = 0;
for (i = 0; i < len + 1; ++i) {
if ((s[i] == ' ') || (s[i] == '\0')) {
if (i - begin > 0) {
// TODO check NULL pointer.
array.strings[j] = (char *) malloc(i - begin + 1);
strncpy(array.strings[j], &s[begin], i - begin);
array.strings[j][i - begin] = '\0';
j++;
}
begin = i + 1;
}
}
array.size = j;
return array;
}
#include <iostream>
#include <assert.h>
#include <string.h>
using namespace std;
char* erase_blank( const char* input , int& blankNum )
{//erase the excess blank in input , return the result string , blankNum is the output argument
assert( input != NULL );
int len = strlen( input );
char* result = new char[len+2+4];//Advanced:Allocate memory only once , 2 for two '\0' , 4 for "NULL"
for( int i=0 ; i<=len ; i++ )
result[i] = '\0';
int begin,end=len;
for( begin = 0 ; begin<len&&(input[begin]==' '||input[begin]=='\t') ; begin++ );//first location without blank in input
for( end=len-1 ; end>=0&&(input[end]==' '||input[end]=='\t') ; end-- );//last location without blank in input
int left = 0;//the loc in result
int right = begin;//the loc in input
char Quotation = '\0';
blankNum = 0;//output,the valid blank num in result
while( right <= end )
{
if( Quotation == '\0' )
{
if( input[right]==' ' || input[right]=='\t' )
{
result[left++] = '\0';//copy only on blank
blankNum++;
while( right<=end && (input[right]==' '||input[right]=='\t') )
right++;//skip continous blank
}
else if( input[right]=='\'' || input[right]=='\"' )
{
Quotation = input[right];//set the Quotation
right++;
}
else
{
if( input[right]=='\\' )
right++;
result[left] = input[right];//copy common char
left++;
right++;
}
}
else
{
if( input[right]==Quotation )
{
Quotation = '\0';//remove the Quotation
right++;
}
else
{
if( input[right]=='\\' )
right++;
result[left] = input[right];//copy common char
left++;
right++;
}
}
}
result[left] = '\0';
result[left+1] = 'N';
result[left+2] = 'U';
result[left+3] = 'L';
result[left+4] = 'L';
result[left+5] = '\0';
blankNum++;
return result;
}
char** str_split( const char* input, int& num )
{//Prototype
assert( input!=NULL );
char* tmp = erase_blank( input , num );
char** result = new char*[++num];
int begin = 0 , end = 0;
int cur = 0;
while( cur<num )
{
if( tmp[end] == '\0')
{
result[cur++] = tmp+begin;
begin = ++end;
}
else
end++;
}
return result;
}
int main(int argc, char* argv[])
{
int num;
const char* input = " grep 'char c = \\\'x\\\';' my\\\ file ";
//const char* input = " grep 'char c = \'x\';' my\ file ";
//const char* input = " find . -name *.c ";
//cout << "input:\"" << input << '\"'<< endl << endl;
/*
注意:我认为StringSplit中此处给的样例输入有问题
在双引号内单个\会自动认为是转义,所以
" grep 'char c = \'x\';' my\ file "
实际上等价于
" grep 'char c = 'x';' my file ",这样会给程序的解析带来歧义
此处我把input改为
" grep 'char c = \\\'x\\\';' my\\\ file "
实际上即为
" grep 'char c = \'x\';' my\ file ",此处的\不再表示转义,而是字符'\',程序对这种情况能正常解析
不知道我这样做对不对,如果有问题请及时联系我,我再想办法修改,谢谢!
*/
char** output = str_split( input, num );
cout << "char** output[" << num << "];" << endl;
for ( int i = 0; i < num; i++)
cout << "output[" << i << "] = " << "\"" << output[i] << "\"" << endl;
delete output[0];
delete output;
return 0;
}
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <iostream>
using namespace std;
char** str_split(const char* input, int* num) {
int sz = strlen(input);
const char * st = input;
const char * end = input + sz-1;
while(st<end && (*st==' ' || *st=='\t')) ++st;//非空白字符开头
if(st==end) return NULL;
while(end>=st && (*end==' ' || *end=='\t')) --end;//非空白字符结尾
if(end<st) return NULL;
const char* p = st;
int split = 1;//可分的子串数目
while(p<=end){
if(*p==' ' || *p=='\t'){
++split;
while(p<=end && (*p==' ' || *p=='\t')) ++p;//移动到下一个非空字符
}
++p;
}
int len = split*sizeof(void*)+sz+1; //要分配的内存大小
char ** output = (char**) malloc(len); //只分配一次内存,前面存放char*指针,后面存放字符串
int offset=split*sizeof(void*);//第一个子串在分配内存中的偏移
char * new_str=strcpy((char*)output+offset, st );//首先,复制原串到分配的内存
char* max_end = new_str + (end-st) +1 ;//所有子串在分配内存中的结尾
*max_end=0;//置为'\0'
output[0] = new_str;//记录第一个子串起始位置
int n=1;
while(new_str<max_end){
if(*new_str==' '||*new_str=='\t'){
*new_str=0;//填充子串结尾
++new_str;
while(new_str<max_end && (*new_str==' '||*new_str=='\t') ){ //寻找下一个子串起始位置
++new_str;
}
if(new_str==max_end) break;
output[n] = new_str;//记录子串起始位置
++n;
}
else{
++new_str;
}
}
*num = n;
return output;
}
int main() {
int num;
const char* input = " find . -name *.c ";
char** output = str_split(input, &num);
for (int i = 0; i < num; i++)
printf("output[%d] = \"%s\"\n", i, output[i]);
/* free memory please */
free(output);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment