DMTL: tokenizer_utils.h Source File

00001 /*
00002  *  Copyright (C) 2005 M.J. Zaki <zaki@cs.rpi.edu> Rensselaer Polytechnic Institute
00003  *  Written by parimi@cs.rpi.edu
00004  *  Updated by chaojv@cs.rpi.edu, alhasan@cs.rpi.edu, salems@cs.rpi.edu
00005  *  Modifications:
00006  *            Added LINE_SZ -- zaki 5/15/06
00007  *
00008  *  This program is free software; you can redistribute it and/or
00009  *  modify it under the terms of the GNU General Public License
00010  *  as published by the Free Software Foundation; either version 2
00011  *  of the License, or (at your option) any later version.
00012  *
00013  *  This program is distributed in the hope that it will be useful,
00014  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  *  GNU General Public License for more details.
00017  *
00018  *  You should have received a copy of the GNU General Public License along
00019  *  with this program; if not, write to the Free Software Foundation, Inc.,
00020  *  59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 #ifndef _TOKENIZER_UTILS_H_
00023 #define _TOKENIZER_UTILS_H_
00024 
00025 #define LINE_SZ 10000 //number of characters per line in database file
00026 
00027 struct parse_word 
00028 {
00029 
00034   char* operator() (char* line, char* word, char delim=' ') const {
00035 
00036     while(*line && *line!=delim) {
00037       *word=*line;
00038       word++;
00039       line++;
00040     }
00041     *word='\0';
00042 
00043     if(*line==delim) {
00044       line++;
00045     }
00046 
00047     return line;
00048   } //end parse_word()
00049 };
00050 
00051 #endif