/**
* Copyright (c) 2008, Corey's Consulting LLC. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
This gets a field from a delimited string. It will not affect the original
string in any way. It does NOT handle quoted fields.
Parameters:
cpLine - i:line containing fields, assumed to be null terminated
cDelim - i:field delimiter to use
iDesiredFld - i:field number to get, starting at 1
cpOutBuf - o:output buffer, where field will go, will be null terminated
iSizBuf - i:size of output buffer, to prevent overruns
Returns:
GF_xxx
****/
int
getField(char *cpLine,char cDelim, int iDesiredFld, char *cpOutBuf,int iSizBuf)
{
int iRv=GF_SUCCESS;
int iCurField=1;
int iBytesCopied=0;
char *cpField;
for(;;) /* enables single exit point */
{
/**********
check parms
**********/
if (iSizBuf < 1)
{
iRv = GF_SIZE_TOO_SMALL;
break;
}
if (iDesiredFld < 1)
{
iRv = GF_FIELD_NOT_FOUND;
break;
}
/**************************
find start of desired field
**************************/
cpField = cpLine;
if (iDesiredFld == 1)
iRv = GF_SUCCESS; /* first field starts at offset 0 */
else
{
/* to be here, iDesiredFld must be greater than 1 */
while (iCurField < iDesiredFld)
{
iCurField++;
/* find beginning of next field */
cpField = strchr(cpField, cDelim);
if (cpField == NULL)
{ /* the previous field was the last field */
iRv = GF_FIELD_NOT_FOUND;
break;
}
cpField++; /* point to beginning of field */
/* loop exits when current and desired field match */
}
}
/* make sure the proper field was found previously */
if (iRv != GF_SUCCESS)
break;
/* cpField is pointing to the first byte of the desired field */
while ( (*cpField != cDelim) &&
(*cpField != (char)NULL) &&
(iBytesCopied < iSizBuf)
)
{
cpOutBuf[iBytesCopied] = *cpField;
cpField++;
iBytesCopied++;
}
/* if the copy stopped because we ran out of room... */
if (iBytesCopied >= iSizBuf)
{
iRv = GF_SIZE_TOO_SMALL;
}
/* if copy stopped because we encountered a null (last field)... */
else if (cpOutBuf[iBytesCopied] == (char)NULL)
{
/* nothing more to do, outbuf is null terminated */
}
/* if copy stopped because we encountered a delimiter... */
else if (cpOutBuf[iBytesCopied] == cDelim)
{
cpOutBuf[iBytesCopied] = (char)NULL; /* null terminate */
}
/* should never occur */
else
{
iRv = GF_INTERNAL_ERROR;
}
if (1==1)
break;
} /* end single exit point "for" */
This function gets a field from a buffer using multi-byte delimiters.
THIS IS NOT A STRING FUNCTION!!!!! While it will set the output
buffer to nulls prior to filling it, it does not intentionally null
terminate the resulting buffer, nor will it use any string functions
to perform its operations; it is designed to extract data from a binary
buffer of a known size which has delimiters that are 1 or more bytes in
length. Its most common application is to parse a file with \r\n line
delimiters. The delimiter may be of any length.
Example:
if delim="ab":
++---------++------------++++-------- multi-byte delimiters
|| || ||||
|| || |||| +--- NOT part of a delimiter
|| || |||| |
vv vv vvvv v
lineabfewjkdwjkabjfi4eo2cfjdkababfdvda
^^^^ ^^^^^^^^^ ^^^^^^^^^^^^ ^^^^^
|||| ||||||||| |||||||||||| |||||
|||| ||||||||| |||||||||||| +++++--- fifth field
|||| ||||||||| |||||||||||| fourth field is empty
|||| ||||||||| ++++++++++++------------ third field
|||| +++++++++-------------------------- second field
++++------------------------------------- first field
Parameters:
cpLine - i:line containing fields, needn't be null terminated
iLenLine - i:length of cpLine
cpDelim - i:field delimiter to use
iLenDelim - i:length of cpDelim
iDesiredFld - i:field number to get, starting at 1
cpOutBuf - o:output buffer, where field will go, will be null terminated
iSizBuf - i:size of output buffer, to prevent overruns
Returns:
GF_xxx
****/
int
getFieldMB(char *cpLine, int iLenLine, char *cpDelim, int iLenDelim,
int iDesiredFld, char *cpOutBuf,int iSizBuf)
{
int iRv;
int iCurField;
int iBytesCopied;
int iByteCounter;
char *cpField;
for(;;) /* enables single exit point */
{
/**********
check parms
**********/
if (iDesiredFld != 1) /* first field starts at offset 0 */
{
while (iCurField < iDesiredFld)
{
// break if we're so close to the end there can't be a delim
if (iLenLine - iByteCounter < iLenDelim)
{
iRv = GF_FIELD_NOT_FOUND;
break;
}
if (iLenDelim == 1)
{
// this increases the speed when delimiter length is 1
if (*(cpLine+iByteCounter) == *cpDelim)
{
// delimiter found, locate first byte of field
cpField = cpLine + iByteCounter + iLenDelim;
iCurField++; // note new field was found
iByteCounter += iLenDelim;
}
else
{
iByteCounter++;
}
}
else
{
if (memcmp(cpLine+iByteCounter, cpDelim, iLenDelim) == 0)
{
// delimiter found, locate first byte of field
cpField = cpLine + iByteCounter + iLenDelim;
iCurField++; // note new field was found
iByteCounter += iLenDelim;
}
else
{
iByteCounter++;
}
}
}
}
if (iRv != GF_SUCCESS)
break;
// at this point, the start of the proper field is being pointed to by
// cpField, iByteCounter is equal to exactly the number of bytes
// already examined, and (iLenLine - iByteCounter) is equal to the
// number of bytes left to process.
/************
extract field
************/
// not endless because SOMETHING below will eventually match
for (iBytesCopied = 0;; cpField++, iBytesCopied++, iByteCounter++)
{
if (iLenDelim < iLenLine - iByteCounter + 1)
{
// this increases the speed when delimiter length is 1
if (iLenDelim == 1)
{
if (*cpField == *cpDelim)
{
// next delimiter encountered; end of field
iRv = GF_SUCCESS;
break;
}
}
else
{
if (memcmp(cpField, cpDelim, iLenDelim) == 0)
{
// next delimiter encountered; end of field
iRv = GF_SUCCESS;
break;
}
}
}
// bail if we are about to run out of room and overwrite
if (iBytesCopied+1 >= iSizBuf)
{
iRv = GF_SIZE_TOO_SMALL;
}
// bail if we run out of input data
if (iByteCounter >= iLenLine)
{
// iByteCounter must be one past end of buffer, must have
// just finished copying the last field
iRv = GF_SUCCESS;
break;
}
cpOutBuf[iBytesCopied] = *cpField;
}
if (1==1)
break;
} /* end single exit point "for" */
Whereas the getField() class of functions are designed to copy data from
a delimited buffer into another buffer defined by the caller, the
getLocation() class of functions are designed to find the beginning of
a particular position in the delimited buffer.
Parameters:
cpLine - i:line containing fields, needn't be null terminated
iLenLine - i:length of cpLine
cpDelim - i:field delimiter to use
iLenDelim - i:length of cpDelim
iDesiredFld - i:field number to get, starting at 1
ipLenField - o:length of the extract field if location is detected
ipErrVal - o:where errors are placed, is always filled with a GF_xxxx
Returns:
addr: location of required field
NULL: field not found or error, check *ipErrVal
****/
char *
getLocationMB(char *cpLine, int iLenLine, char *cpDelim, int iLenDelim,
int iDesiredFld, int *ipLenField, int *ipErrVal)
{
int iCurField;
int iValidBytes;
int iByteCounter;
char *cpField;
char *cpStartOfField; // points to first byte of data in field
char *cpEndOfField; // points to one past last byte of data in field
for(;;) /* enables single exit point */
{
/**********
check parms
**********/
if (ipErrVal == NULL)
break; // can't set anything...
if (iDesiredFld != 1) /* first field starts at offset 0 */
{
while (iCurField < iDesiredFld)
{
// break if we're so close to the end there can't be a delim
// and it still hasn't been found yet
if (iLenLine - iByteCounter < iLenDelim)
{
*ipErrVal = GF_FIELD_NOT_FOUND;
break;
}
if (iLenDelim == 1)
{
// this saves on a call to memcmp if the length of
// delimiters is 1... it's not truly multi-byte.
if (*(cpLine+iByteCounter) == *cpDelim)
{
// delimiter found, locate first byte of field
cpField = cpLine + iByteCounter + iLenDelim;
iCurField++; // note new field was found
iByteCounter += iLenDelim; // jump past delimiter
}
else
{
iByteCounter++;
}
}
else
{
if (memcmp(cpLine+iByteCounter, cpDelim, iLenDelim) == 0)
{
// delimiter found, locate first byte of field
cpField = cpLine + iByteCounter + iLenDelim;
iCurField++; // note new field was found
iByteCounter += iLenDelim; // jump past delimiter
}
else
{
iByteCounter++;
}
}
}
}
if (*ipErrVal != GF_SUCCESS)
break;
cpStartOfField = cpField;
// At this point:
//
// 1. The start of the proper field is being pointed to by cpStartOfField
// 2. iByteCounter is equal to exactly the number of bytes already examined
// 3. (iLenLine - iByteCounter) is equal to the number of bytes left to process
//
/************************
find end of desired field
************************/
// not endless because SOMETHING below will eventually match
for (iValidBytes = 0;; cpField++, iValidBytes++, iByteCounter++)
{
if (iLenDelim < iLenLine - iByteCounter + 1)
{
if (iLenDelim == 1)
{
// this will be slightly faster than the memcmp below...
if (*cpField == *cpDelim)
{
// next delimiter encountered; end of field
break;
}
}
else
{
if (memcmp(cpField, cpDelim, iLenDelim) == 0)
{
// next delimiter encountered; end of field
break;
}
}
}
// bail if we run out of input data
if (iByteCounter >= iLenLine)
{
// iByteCounter must be one past end of buffer, must have
// just finished copying the last field
break;
}
}
// end of field is defined to be the first byte which doesn't
// count towards the number of valid bytes.
for (;;) // not endless because memStr will eventually fail
{
if (iLenDelim == 1)
{
// use faster version if single byte delimiter
cpNextDlm = (char *)memchr(cpStart, cpDelim[0], iLenRemaining);
}
else
{
cpNextDlm = utl_memStr(cpStart, iLenRemaining,
cpDelim, iLenDelim);
}
if (cpNextDlm == NULL)
{
// no more fields, must account for final field
if (iLenRemaining == 0)
{
if (cIncludeEmpty)
{
++iFieldCount;
}
// else do nothing, empty field
}
else
{
// field has length; count it
++iFieldCount;
}
break;
}
else
{
// delimiter detected, increment field count