[cig-commits] r6889 - in cs/cigma/trunk: . include include/util
tests util
luis at geodynamics.org
luis at geodynamics.org
Wed May 16 10:08:23 PDT 2007
Author: luis
Date: 2007-05-16 10:08:23 -0700 (Wed, 16 May 2007)
New Revision: 6889
Added:
cs/cigma/trunk/include/util/
cs/cigma/trunk/include/util/split.h
cs/cigma/trunk/tests/test_split.c
cs/cigma/trunk/util/
cs/cigma/trunk/util/split.c
Log:
Added util directory for any misc utility functions that don't belong in cigma per se.
First function added is a string split() function, which should behave much like the Python version.
Added: cs/cigma/trunk/include/util/split.h
===================================================================
--- cs/cigma/trunk/include/util/split.h 2007-05-16 17:04:13 UTC (rev 6888)
+++ cs/cigma/trunk/include/util/split.h 2007-05-16 17:08:23 UTC (rev 6889)
@@ -0,0 +1,16 @@
+#ifndef __UTIL_SPLIT_H__
+#define __UTIL_SPLIT_H__
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void split(char *str, int len,
+ char ***split_list, int *split_count,
+ char sep);
+
+void split_free(char **split_list, int split_count);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
Added: cs/cigma/trunk/tests/test_split.c
===================================================================
--- cs/cigma/trunk/tests/test_split.c 2007-05-16 17:04:13 UTC (rev 6888)
+++ cs/cigma/trunk/tests/test_split.c 2007-05-16 17:08:23 UTC (rev 6889)
@@ -0,0 +1,53 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <util/split.h>
+
+int main(int argc, char *argv[])
+{
+ /* loop variable */
+ int i;
+
+ /* list of strings for storing the split array */
+ char **list;
+ int n;
+
+ /* string to split */
+ char *str;
+ int len;
+
+ /* separator */
+ char sep;
+
+ if (argc != 3)
+ {
+ fprintf(stderr, "Usage: %s string sep\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ str = strdup(argv[1]);
+ len = strlen(argv[1]);
+ sep = argv[2][0];
+
+
+ split(str, len, &list, &n, sep);
+
+
+ printf("Found %d parts\n", n);
+ for (i = 0; i < n; i++)
+ {
+ printf("\tpart[%d] = \"%s\"\n", i, list[i]);
+ }
+
+
+ /* clean up */
+ free(str);
+ for (i = 0; i < n; i++)
+ {
+ free(list[i]);
+ }
+ free(list);
+
+ return EXIT_SUCCESS;
+}
Added: cs/cigma/trunk/util/split.c
===================================================================
--- cs/cigma/trunk/util/split.c 2007-05-16 17:04:13 UTC (rev 6888)
+++ cs/cigma/trunk/util/split.c 2007-05-16 17:08:23 UTC (rev 6889)
@@ -0,0 +1,177 @@
+#include <stdlib.h>
+#include <string.h>
+#include <util/split.h>
+
+
+/*
+ * Data structure for our basic string tokenizer. Basically, the strategy
+ * is to copy the desired string into our buffer, where we can zero out
+ * the separator characters in-place. Using a struct allows us to avoid
+ * defining global variables like in the version of split.c from the project
+ * http://www.nongnu.org/uri/ on which this code is inspired.
+ *
+ */
+
+typedef struct
+{
+ char **tokens; // tokens array
+ int token_count; // real size of tokens array
+ int max_token_count; // size of tokens array
+
+ char *buffer; // buffer string
+ int buffer_length; // length of buffer string
+
+} strtok_t;
+
+
+
+/*
+ * In this section, we define the methods for our string tokenizer
+ * object. Namely, a constructor, a destructor, and then the actual
+ * routine to split the string.
+ *
+ */
+
+static void strtok_init(strtok_t *st)
+{
+ /* starting up with 0 tokens */
+ st->token_count = 0;
+
+ /* setup the initial array sizes */
+ st->max_token_count = 16; // 16 tokens in array
+ st->buffer_length = 512; // 512 chars in buffer
+
+ /* allocate enough tokens and initialize buffer */
+ st->buffer = (char *)malloc(st->buffer_length * sizeof(char));
+ st->tokens = (char **)malloc(st->max_token_count * sizeof(char *));
+}
+
+static void strtok_free(strtok_t *st)
+{
+ if (st != NULL)
+ {
+ free(st->buffer);
+ free(st->tokens);
+ }
+}
+
+static void strtok_split(strtok_t *st, char *s, int len, char separator)
+{
+ /*
+ * First, check whether our buffer is large enough to manipulate
+ * the string s, and if not, reallocate enough memory.
+ */
+ if (st->buffer_length < len)
+ {
+ st->buffer_length = (len < 512) ? 512 : len+1;
+ st->buffer = (char *)realloc(st->buffer, st->buffer_length * sizeof(char));
+ }
+
+ /*
+ * Next, copy the string s into our buffer and tokenize it in-place.
+ * Essentially, zero out the locations where we find the separator
+ * character, while remembering the beginning of each string.
+ */
+ memcpy(st->buffer, s, len);
+ st->buffer[len] = '\0';
+ {
+
+ char *first, *p;
+ int index, last;
+
+ first = st->buffer;
+ last = st->buffer_length - 1;
+
+ /* remove trailing separators */
+ while (last >= 0 && st->buffer[last] == separator)
+ {
+ st->buffer[last] = '\0';
+ last--;
+ }
+
+ /* remove leading separators */
+ while (*first == separator)
+ {
+ first++;
+ }
+
+ /* store first token */
+ index = 0;
+ st->tokens[index++] = first;
+
+ /* keep tokenizing the buffer */
+ for (p = strchr(first, separator);
+ p != NULL;
+ p = strchr(p, separator))
+ {
+ /* separator found -- zero it out */
+ *p = '\0';
+
+ /* make p point to next char */
+ p++;
+
+ /* store the next token */
+ if ((*p != separator) && (*p != '\0'))
+ {
+ st->tokens[index++] = p;
+
+ /* check whether we need to expand our tokens array,
+ * to make room for the next batch of tokens
+ */
+ if (index >= (st->max_token_count))
+ {
+ st->max_token_count += 16;
+ st->tokens = (char **)realloc(st->tokens,
+ st->max_token_count
+ * sizeof(char *));
+ }
+ }
+ }
+
+ /* store the final count */
+ st->token_count = index;
+ }
+
+ return;
+}
+
+
+
+/*
+ * Finally, we provide a procedural interface to our string tokenizer.
+ * The caller subsumes the responsibility of freeing the newly allocated
+ * list, as well as each individual string in that list.
+ */
+void split(char *str, int len,
+ char ***split_list, int *split_count,
+ char sep)
+{
+ int i;
+ strtok_t tok;
+
+ strtok_init(&tok);
+ strtok_split(&tok, str, len, sep);
+
+ *split_list = (char **)malloc(tok.token_count * sizeof(char *));
+ *split_count = tok.token_count;
+
+ for (i = 0; i < tok.token_count; i++)
+ {
+ (*split_list)[i] = strdup(tok.tokens[i]);
+ }
+
+ strtok_free(&tok);
+}
+
+void split_free(char **split_list, int split_count)
+{
+ int i;
+ if (split_list != NULL)
+ {
+ for (i = 0; i < split_count; i++)
+ {
+ free(split_list[i]);
+ }
+ free(split_list);
+ }
+}
More information about the cig-commits
mailing list