/*
 * Copyright (c) 1995, 1996 Gunther Schadow.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#ifndef ODBM_H_
#define ODBM_H_

#include <pg_config.h>
#include <sys/types.h>
#include <fBitSet.h>

#pragma interface

/*
ODBM -- a C++ Interface to GDBM.

The odbm class is an abstract base class that provides an interface to
the gdbm (or dbm, ndbm?) databases. It does not try to provide a
general object persistency mechanism, and thus some rules have to be
obeyed when defining children of odbm.  Generally, the odbm class
shuld be inherited private (or maybe protected), since there is
nothing in odbm that should be propagated out of the odbm derivatives.
Especially two different odbm derivatives should not be valued as
belonging to the same family just because of the odbm class. The only
functions that make sense to export are fetch() and exists(). However,
this can be done by (inline) forward functions.

1. Supported types are only scalars and null terminated character
   strings provided as pointers.

2. In the value section, the key to the gdbm must be provided first
   and must be of the string type (char *).

3. Any other string in the structure must appear before the scalar
   data.

4. The constructor must call the odbm constructor telling it the
   location of the key, the location of the first string pointer and how
   many string pointers there are, and finally the size of the scalar
   block. The constructor of odbm also wants to know the database file
   name.

5. The memory used by the strings must be deleted with the odbm::free
   function and not by the standard free() function or the delete []
   operator. This is because the odbm functions modify the string
   pointers such that they point into special buffers allocated by
   odbm.

DIFFERENT ACCESS MODES

1. SINGLE OBJECT ACCESS:
   The database file is opened at the time of construction of an
   instance of an odbm derived class and is closed when this instance is
   being destroyed.

2. MULTI OBJECT ACCESS:
   The database is opened on the construction of the first instance of
   an odbm derived class and is not closed before the last instance is
   being destroyed. Therefore, the odbm ctor needs two more arguments:
   the database file variable and the object counter, which are
   typically static members of the derived class. However, in some
   cases it might be useful to declare them dynamic members, but please
   note that both have to have the same storage class, either dynamic
   or static! The object counter must be initialized to 0 before the
   first object is constructed.

3. MODES (READ vs. WRITE ACCESS and the like)
   Gdbm allows write access to a gdbm file for only one agent and only
   when no other agents currently have the file open for reading. The
   access mode thus defaults to ``odbm::read''. If you need to write,
   call the ctor with the additional argument ``odbm::write'' Write
   access is possible in both single or multi objects access.

Finally the blocksize and cachesize (see gdbm(1)) can be specified as
the optional last two arguments (which otherwhise default to 512 resp.
100) to the ctor of the odbm class.

The following is an example class, that summarizes the protocol used
with odbm derived classes:

class Foo : private odbm {

  // THE KEY
  char *key;

  // NON-PERSISTENT SECTION 1
  int   transient1;
  char  *transient2;

  // STRING SECTION
  char  *string1;
  char  *string2;
  
  // SCALAR SECTION
  int    scalar1;
  char   fixlen[10];
  double scalar3;

  // NON-PERSISTENT SECTION 2
  int   transient3;
  char  *transient4;

  static const char *db_name = "/usr/databases/foo.db";
  static GDBM_FILE db_file;
  static unsigned int obj_cnt; // TO BE INITIALIZED TO 0!!

  Foo() : odbm(&key, &string1, &scalar1,
	       sizeof(scalar1) + sizeof(fixlen) + sizeof(scalar3),
	       [ , &db_file, &obj_cnt ],
	       db_name,
	       [ (read|write|...)
	         [ , <cache_size> [ , <block_size>]]
	       ])
    {...}
  ...
}

The implementation file for this class would somewhere show the
following statement:

unisigned int Foo::obj_cnt = 0;

The methods that odbm adds to the class are:

1. fetch  -- fetch an object from the database.
2. insert -- insert an object into the database (if object does not exist
             in there already).
3. update -- update the values of an already existing object or insert new
             one.
4. exists -- check if an object for a given key is in the database.
5. remove -- delete an object from the database.

String data that was fetched by odbm is supplied in a block and may not be
deleted but by the next odbm::fetch call.


INDEXING FACILITY

Odbm supports multiple indices which must be initialized when the odbm
object is constructed. This is done by the function member

  index_on(char **variable)

This function is called for each variable member that shall be
searchable.  The best place to do this is within the constructor of
the odbm derived object. The index is stored as special key/data pairs
in the gdbm database. Thus, an index may become orphaned if it's
associated key has been deleted. Indices need not be unique,
i.e. there may be a n:n mapping of indices to keys.

An object is fetched via an index by the function member:

  fetch(const char *idx, u_int *cursor = NULL)
  fetch(const char **idx, const char *value, u_int *cursor = NULL)

The first form may be called with *idx set to any string. There are the
following cases:

1. idx is the key itself   --> fetch via the key.
2. idx is an index         --> fetch via this index.
3. idx is any other string --> fetch via the key set to the string.

The second form is a generalization of the third (and first) case of
the first form. The object is fetched via the index (or key) set to
the string `value'.

The argument `cursor' must be used if multiple references of an index
shall be fetched subsequently and is used only if *idx is a proper
index.  For *cursor == 0, the first index entry is fetched, and for any
(*cursor)++, the next entry.

If the fetch() fails, because either the key is not found or the index
with the skip value does not exist, it returns FAIL but leaves the
object as it was before fetch() was called.

If an index became orphaned due to a removed data record which it
references, odbm tries to find the next valid index or fails if there
are no more indices left. The cursor value of that index is returned
in the cursor variable.

An index can be manually defined for a data entry. This is used when
an indexed field contains multiple aliases which will otherwhise not
be searchable by specifying only one of them. Use

  index(const char **idx, const char *value)

where `idx' is a pointer to the index variable and `value' is the name
value that can be searched for.

There is a function member reindex() that removes all existing index
entries and creates them all new. (NOT YET IMPLEMENTED)


FILTERING FACILITY

Results from fetch via an index can be postprocessed through a filter. A
result record is only returned when the filter matched the output. You
The following methods exists to adjust the filter:

  addfilt(char **variable, const char *value, operator_t op);
  addfilt(char **variable, const char *value, operator_f op);
  remfilt();

The function addfilt(...) adds an entry to the list of filter clauses,
while remfilt() removes the whole filter. An operator can be set as
`eq' or `ne' or as a filter function pointer defined as follows:

  bool (*operator_f)(const char *, const char *);

If there is a filter defined, a fetch via an index behaves slightly
different: The contents of the odbm derived class may be updated even
though fetch might fail due to a failed test against the filter for
the last index entry.


TODO

Add support for variable data in the form of:

   struct {
     size_t size;
     void  *data;
   }

This data will later be assumed to reside between the string section and
the scalar section.  */

#include <gdbm.h>

class odbm {
 public:
  enum mode {
    read      = GDBM_READER,
    write     = GDBM_WRITER,
    create    = GDBM_WRCREAT,
    newdb     = GDBM_NEWDB,
    fast      = GDBM_FAST,
  };

  typedef GDBM_FILE odbm_file;
  const odbm_file null_dbf = NULL;
  const int default_blocksize = 512;
  const int default_cachesize = 100;
  void free(void *); // free the key or a string;
  void free();       // free the key and all strings;

 protected:
  // single object access constructor
  odbm(char **key, char **string1, void *scalar1, size_t scalar_size,
       const char *dbf_name, int mode = read,
       int cachesize = default_cachesize, int blocksize = default_blocksize);

  // multi objects access constructor
  odbm(char **key, char **string1, void *scalar1, size_t scalar_size,
       odbm_file *db_file, unsigned int *obj_cnt,
       const char *dbf_name, int mode = read,
       int cachesize = default_cachesize, int blocksize = default_blocksize);

  virtual ~odbm();

  // copy constructor and assigmnment operator. These copy all the
  // registered data of the odbm derived class en bloc.
  odbm(const odbm &);
  virtual odbm &operator = (const odbm &);

  // retrieval methods
  // 1. fetch with key found in structure
  result fetch();
  // 2. fetch with supplied key or index
  result fetch(const char *idx, u_int *cursor = NULL);
  // 3. fetch with supplied key or index set to supplied value
  result fetch(char **idx, const char *value, u_int *cursor = NULL);
  // 4. fetch with key or index and filters found in structure
  result fetch(u_int *cursor);

  // 1. look up the key found in structure
  bool exists() const;
  // 2. look up the supplied key or index
  bool exists(const char *idx, u_int *cursor = NULL) const;
  // 3. look up the supplied key or index set to supplied value
  bool exists(char **idx, const char *value, u_int *cursor = NULL) const;

  const char *firstkey() const; // fetch the first object in the database
  const char *nextkey() const;  // fetch next object in the database

  // The rest can be used only in write mode
  // storage methods
  void insert() const;
  void update() const;

  // removing methods
  void remove(const char *key) const; // remove the supplied key
  void remove() const;                // remove the current object

  // management methods
  void reorganize() const;
  void sync()       const;
  void fastwrite()  const;
  void slowwrite()  const;

  // index facility
  void index_on(char * const *) const; // set an index on a variable
  void index(char * const *, const char *) const; // manually set index
  void reindex() const;                   // rebuild index entries

  // filter facility
  enum operator_t { eq, ne };
  typedef bool (*operator_f)(const char *, const char *);
  void addfilt(const char * const *variable, const char *value,
	       operator_t op = eq);
  void addfilt(const char * const *variable, const char *value,
	       operator_f op);
  void remfilt();
  
 private:
  odbm_file  *dbfp;
  unsigned int *object_cnt; // != NULL for multi objects access

  // filename and gdbm_open parameters needed for copy constructor
  struct reopen_s {
    char *dbfn;
    int cachesize;
    int blocksize;
    int mode;
  } reopen;

  datum key;
  datum data;

  char   **keyp;
  int    nostrings;
  char   **strings;
  struct datum scalars;
  BitSet indices;

  // filter facility
 public:
  friend class filter {
    int   str;
    char *value;
    operator_t opt;
    operator_f opf;
    class filter *next;

   public:
    //filter(const filter &); // default copy constructor copies bitwise
    filter(const filter *);   // copy *pointer* constructor does real copy
    filter(int, const char *, operator_t opt = eq);
    filter(int, const char *, operator_f opf);
    ~filter();
    void add(int, const char *, operator_t opt = eq);
    void add(int, const char *, operator_f opf);
    bool test(const odbm *);
    const char *operator[](int) const;
  };

 private:
  filter *fltp;
  
 public:
  const filter *thefilter() const { return fltp; }

 private:
  void addfilt(int strno, const char *value, operator_t op = eq);

  const char *parse_key() const;
  datum build_key(const char *k) const;
  void update_key() const;
  void build_data() const;
  void parse_data();

  // index faclility
  int   stringno(const char *x) const;
  int   stringno(const char * const *x) const;
  void  insert_indices() const;
  datum deref_index(int stringno, const char *idx, u_int *cursor,
		    datum &key) const;
  u_int index_counter(int field, const char *value) const; 
  result fltfetch(int indexno, const char *idx, u_int *cursor);

  int the_index;
 protected:
  u_int the_cursor;
};

#ifndef OUTLINE
#include <odbm.icc>
#endif

#endif /* ! ODBM_H_ */
