////////////////////////////////////////////////////////////////////////////
// NoteCase notes manager project <http://notecase.sf.net>
//
// This code is licensed under BSD license.See "license.txt" for more details.
//
// File: Class implements fast text buffer search method
////////////////////////////////////////////////////////////////////////////

#include "TextSearch.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

TextSearch::TextSearch()
{
	m_dwStyle		= 0;
	m_pszPattern	= NULL;
	m_nPtrnSize		= 0;
}

TextSearch::~TextSearch()
{
}

void TextSearch::SetScanStyle(unsigned long dwStyle)
{
	//NOTE: you should NOT change style once the pattern is set 
	//		(since that functions used current style setting)
	//ASSERT(NULL == m_pszPattern && 0 == m_nPtrnSize);

	m_dwStyle |= dwStyle;
}

void TextSearch::SetSearchPattern(const char *szText)
{
	m_pszPattern = (const BYTE *)szText;
	m_nPtrnSize	 = strlen(szText);	//TOFIX if NULL

	BuildShiftTable();
}

void TextSearch::SetSearchPattern(const char *szBinary, unsigned int nSize)
{
	m_pszPattern = (const BYTE *)szBinary;
	m_nPtrnSize	 = nSize;

	BuildShiftTable();
}

bool TextSearch::SetScanBuffer(const char *szBuffer, unsigned int nSize)
{
	m_pszBlock	= (const BYTE *)szBuffer;
	m_nBlkSize	= nSize;
	return (m_pszBlock != NULL && m_nBlkSize > 0);
}

//TOFIX use int64 for large memory
long TextSearch::Search(unsigned long nStartPos)
{
  size_t
        match_size;                 //  Size of matched part
    const BYTE
        *match_base = NULL,         //  Base of match of pattern
        *match_ptr  = NULL,         //  Point within current match
        *limit      = NULL;         //  Last potiental match point
    const BYTE
        *block   = m_pszBlock,		//  Concrete pointer to block data
        *pattern = m_pszPattern;	//  Concrete pointer to search value

    //ASSERT (block);                 //  Expect non-NULL pointers, but
    //ASSERT (pattern);               //  fail gracefully if not debugging

	if(nStartPos < 0 || nStartPos >= m_nBlkSize)
		return -1; //invalid position

    if (block == NULL || pattern == NULL)
        return -1;

	block += nStartPos;

    //  Pattern must be smaller or equal in size to string
    if (m_nBlkSize-nStartPos < m_nPtrnSize)
        return -1;                  //  Otherwise it's not found

    if (m_nPtrnSize == 0)           //  Empty patterns match at start
        return 0;

    //  Search for the block, each time jumping up by the amount             
    //  computed in the shift table                                          

    limit = block + (m_nBlkSize - nStartPos - m_nPtrnSize + 1);
    //ASSERT (limit > block);

	//NOTE: two versions: case sensitive and case insensitive version
	if(m_dwStyle & FS_CASE_INSENSITIVE)
	{
		for (match_base = block;
			 match_base < limit;
			 match_base += m_shift [ tolower(*(match_base + m_nPtrnSize)) ])
		{
			match_ptr  = match_base;
			match_size = 0;

			// Compare pattern until it all matches, or we find a difference
			while (tolower(*match_ptr++) == tolower(pattern [match_size++]))
			{
				//ASSERT (match_size <= m_nPtrnSize && match_ptr == (match_base + match_size));

				// If we found a match, return the start address
				if (match_size >= m_nPtrnSize)
				  return (match_base - m_pszBlock);
			}
		 }
	}
	else
	{
		for (match_base = block;
			 match_base < limit;
			 match_base += m_shift [ *(match_base + m_nPtrnSize) ])
		{
			match_ptr  = match_base;
			match_size = 0;

			// Compare pattern until it all matches, or we find a difference
			while (*match_ptr++ == pattern [match_size++])
			{
				//ASSERT (match_size <= m_nPtrnSize && match_ptr == (match_base + match_size));

				// If we found a match, return the start address
				if (match_size >= m_nPtrnSize)
				  return (match_base - m_pszBlock);
			}
		}

    }

	return -1;	// Found nothing
}

void TextSearch::BuildShiftTable()
{
	//  Build the shift table unless we're continuing a previous search      

    //  The shift table determines how far to shift before trying to match
    //  again, if a match at this point fails.  If the byte after where the
    //  end of our pattern falls is not in our pattern, then we start to
    //  match again after that byte; otherwise we line up the last occurence
    //  of that byte in our pattern under that byte, and try match again.
    unsigned int i;
    for (i = 0; i < 256; i++)
        m_shift[i] = m_nPtrnSize + 1;

	if(m_dwStyle & FS_CASE_INSENSITIVE)
	{
		//case insensitive version
		for (i = 0; i < m_nPtrnSize; i++)
			m_shift[(BYTE) tolower(m_pszPattern[i])] = m_nPtrnSize - i;
	}
	else
	{
		//case sensitive version
		for (i = 0; i < m_nPtrnSize; i++)
			m_shift[(BYTE) m_pszPattern[i]] = m_nPtrnSize - i;
	}
}

void TextSearch::Clear()
{
	m_pszPattern = NULL;
	m_nPtrnSize  = 0;
	m_pszBlock	 = NULL;
	m_nBlkSize   = 0;
}

