Main Page | Class List | Directories | File List | Class Members | File Members

multiset.h

Go to the documentation of this file.
00001 /*
00002 || This file is part of Pike. For copyright information see COPYRIGHT.
00003 || Pike is distributed under GPL, LGPL and MPL. See the file COPYING
00004 || for more information.
00005 || $Id: multiset.h,v 1.40 2005/04/08 16:55:53 grubba Exp $
00006 */
00007 
00008 #ifndef MULTISET_H
00009 #define MULTISET_H
00010 
00011 /* Multisets using rbtree.
00012  *
00013  * Created by Martin Stjernholm 2001-05-07
00014  */
00015 
00016 /* #define TEST_MULTISET */
00017 
00018 #include "svalue.h"
00019 #include "dmalloc.h"
00020 #include "rbtree.h"
00021 #include "block_alloc_h.h"
00022 
00023 /* Note: Don't access the ind svalue (or at least not its type field)
00024  * in the following directly, since the rbtree flags overlay that. Use
00025  * assign_multiset_index_no_free() or similar instead. */
00026 
00027 struct msnode_ind
00028 {
00029   struct msnode_ind *prev, *next; /* Must be first. */
00030   struct svalue ind;            /* Must be second. */
00031 };
00032 
00033 struct msnode_indval
00034 {
00035   struct msnode_indval *prev, *next; /* Must be first. */
00036   struct svalue ind;            /* Must be second. */
00037   struct svalue val;
00038 };
00039 
00040 union msnode
00041 {
00042   struct msnode_ind i;
00043   struct msnode_indval iv;
00044 };
00045 
00046 #define MULTISET_FLAG_MARKER    0x1000
00047 #define MULTISET_FLAG_MASK      (RB_FLAG_MASK | MULTISET_FLAG_MARKER)
00048 /* The flag marker is used to reliably nail direct uses of the ind
00049  * type field. */
00050 
00051 struct multiset_data
00052 {
00053   INT32 refs, noval_refs;
00054 #ifdef PIKE_RUN_UNLOCKED
00055 #error multiset_data has not been adapted for running unlocked.
00056 #endif
00057   union msnode *root, *free_list;
00058   struct svalue cmp_less;
00059   INT32 size, allocsize;
00060   TYPE_FIELD ind_types;
00061   TYPE_FIELD val_types;         /* Always BIT_INT in valueless multisets. */
00062   INT16 flags;
00063   union msnode nodes[1];
00064 };
00065 
00066 struct multiset
00067 {
00068   PIKE_MEMORY_OBJECT_MEMBERS;
00069   struct multiset_data *msd;
00070   struct multiset *next, *prev;
00071   INT32 node_refs;
00072 };
00073 
00074 /* Data structure notes:
00075  *
00076  * o  The node free list through multiset_data.free_list is singly
00077  *    linked by the next pointers. Nodes on the free list are either
00078  *    free or deleted (see below). Free nodes have ind.type set to
00079  *    PIKE_T_UNKNOWN, and deleted nodes are flagged with T_DELETED.
00080  *
00081  *    Deleted nodes are always before free nodes on the free list.
00082  *    multiset_data.size counts both the allocated and the deleted
00083  *    nodes. Note that deleted nodes might still be on the free list
00084  *    even when there are no node references (see below).
00085  *
00086  * o  multiset_data.cmp_less.type is T_INT when the internal set order
00087  *    is used.
00088  *
00089  * o  multset_data.refs counts the number of "independent" references
00090  *    to the data block. When it's greater than one, the data block
00091  *    must always be copied if anything but a node value is changed.
00092  *    Changes of node values might be allowed depending on
00093  *    multset_data.noval_refs.
00094  *
00095  *    A direct pointer to or into the data block can only be kept if
00096  *    it's accounted for by this ref counter, otherwise an indirection
00097  *    via a multiset struct must be used.
00098  *
00099  *    When the gc runs, it won't touch a multiset_data block that got
00100  *    direct external references, i.e. if multiset_data.refs is
00101  *    greater than the number of multiset objects that points to
00102  *    it.
00103  *
00104  * o  multiset_data.noval_refs is the number of the multiset_data.refs
00105  *    references that don't lock the value part of the data block.
00106  *    Thus, values may be changed without copy if (refs - noval_refs)
00107  *    is 1 (if youself have a value lock) or 0 (if you don't). The
00108  *    functions below that change only values assume that the caller
00109  *    has a value lock.
00110  *
00111  *    Note that you should not do any operation that might cause a
00112  *    copy of the data block (which includes insert and delete) when
00113  *    you've increased noval_refs, since it then won't be possible for
00114  *    the copying function to know whether one ref in noval_refs
00115  *    should be moved to the copy or not. All copy operations let
00116  *    noval_refs stay with the original.
00117  *
00118  * o  multiset.node_refs counts the number of references to nodes in
00119  *    the multiset. The references are array offsets in
00120  *    multiset.msd->nodes, so the data block may be reallocated but no
00121  *    nodes may be moved relatively within it. Values but not indices
00122  *    may be changed when there are node references, nodes may be
00123  *    added and removed, and the order may be changed.
00124  *
00125  *    Nodes that are deleted during nonzero node_refs are linked in
00126  *    first on the free list as usual, but ind.type is set to
00127  *    T_DELETED. They are thereby flagged to not be used again.
00128  *    Deleted nodes on the free list are flagged as free as soon as
00129  *    possible after all node references are gone. There are however
00130  *    some combinations of node references and shared data blocks that
00131  *    results in deleted nodes on the free list even after all node
00132  *    references are gone.
00133  *
00134  *    The prev and ind.u.ptr pointers in deleted nodes point to the
00135  *    previous and next neighbor, respectively, of the node at the
00136  *    time it was deleted. Thus the relative position of the node is
00137  *    remembered even after it has been deleted.
00138  */
00139 
00140 /* The following are compatible with PIKE_WEAK_INDICES and PIKE_WEAK_VALUES. */
00141 #define MULTISET_WEAK_INDICES   2
00142 #define MULTISET_WEAK_VALUES    4
00143 #define MULTISET_WEAK           6
00144 
00145 #define MULTISET_INDVAL         8
00146 
00147 extern struct multiset *first_multiset;
00148 extern struct multiset *gc_internal_multiset;
00149 
00150 PMOD_EXPORT extern struct svalue svalue_int_one;
00151 
00152 PMOD_EXPORT void multiset_clear_node_refs (struct multiset *l);
00153 
00154 #ifdef PIKE_DEBUG
00155 /* To get good type checking. */
00156 static INLINE union msnode *msnode_check (union msnode *x)
00157   {return x;}
00158 PMOD_EXPORT extern const char msg_no_multiset_flag_marker[];
00159 #else
00160 #define msnode_check(X) ((union msnode *) (X))
00161 #endif
00162 
00163 #define MULTISET_STEP_FUNC(FUNC, NODE)                                  \
00164   ((union msnode *) FUNC ((struct rb_node_hdr *) msnode_check (NODE)))
00165 #define low_multiset_first(MSD) MULTISET_STEP_FUNC (rb_first, (MSD)->root)
00166 #define low_multiset_last(MSD) MULTISET_STEP_FUNC (rb_last, (MSD)->root)
00167 #define low_multiset_prev(NODE) MULTISET_STEP_FUNC (rb_prev, NODE)
00168 #define low_multiset_next(NODE) MULTISET_STEP_FUNC (rb_next, NODE)
00169 #define low_multiset_get_nth(MSD, N)                                    \
00170   ((union msnode *) rb_get_nth ((struct rb_node_hdr *) (MSD)->root, (N)))
00171 union msnode *low_multiset_find_eq (struct multiset *l, struct svalue *key);
00172 
00173 #define low_assign_multiset_index_no_free(TO, NODE) do {                \
00174     struct svalue *_ms_index_to_ = (TO);                                \
00175     *_ms_index_to_ = msnode_check (NODE)->i.ind;                        \
00176     DO_IF_DEBUG (                                                       \
00177       if (!(_ms_index_to_->type & MULTISET_FLAG_MARKER))                \
00178         Pike_fatal (msg_no_multiset_flag_marker);                       \
00179     );                                                                  \
00180     _ms_index_to_->type &= ~MULTISET_FLAG_MASK;                         \
00181     add_ref_svalue (_ms_index_to_);                                     \
00182   } while (0)
00183 #define low_assign_multiset_index(TO, NODE) do {                        \
00184     struct svalue *_ms_index_to2_ = (TO);                               \
00185     free_svalue (_ms_index_to2_);                                       \
00186     low_assign_multiset_index_no_free (_ms_index_to2_, (NODE));         \
00187   } while (0)
00188 #define low_push_multiset_index(NODE)                                   \
00189   low_assign_multiset_index_no_free (Pike_sp++, (NODE))
00190 #define low_use_multiset_index(NODE, VAR)                               \
00191   ((VAR) = msnode_check (NODE)->i.ind,                                  \
00192    DO_IF_DEBUG ((VAR).type & MULTISET_FLAG_MARKER ? 0 :                 \
00193                 (Pike_fatal (msg_no_multiset_flag_marker), 0) COMMA)    \
00194    (VAR).type &= ~MULTISET_FLAG_MASK,                                   \
00195    &(VAR))
00196 
00197 #define low_get_multiset_value(MSD, NODE)                               \
00198   ((MSD)->flags & MULTISET_INDVAL ? &(NODE)->iv.val : &svalue_int_one)
00199 #define low_set_multiset_value(MSD, NODE, VAL) do {                     \
00200     if ((MSD)->flags & MULTISET_INDVAL)                                 \
00201       assign_svalue (&(NODE)->iv.val, VAL);                             \
00202   } while (0)
00203 
00204 #define OFF2MSNODE(MSD, OFFSET)                                         \
00205   ((MSD)->flags & MULTISET_INDVAL ?                                     \
00206    (union msnode *) (&(MSD)->nodes->iv + (OFFSET)) :                    \
00207    (union msnode *) (&(MSD)->nodes->i + (OFFSET)))
00208 #define MSNODE2OFF(MSD, NODE)                                           \
00209   ((MSD)->flags & MULTISET_INDVAL ?                                     \
00210    &(NODE)->iv - &(MSD)->nodes->iv : &(NODE)->i - &(MSD)->nodes->i)
00211 
00212 PMOD_EXPORT INT32 multiset_sizeof (struct multiset *l);
00213 #define l_sizeof(L) multiset_sizeof (L)
00214 #define multiset_ind_types(L) ((L)->msd->ind_types)
00215 #define multiset_val_types(L) ((L)->msd->val_types)
00216 #define multiset_get_flags(L) ((L)->msd->flags)
00217 #define multiset_get_cmp_less(L) (&(L)->msd->cmp_less)
00218 #define multiset_indval(L) ((L)->msd->flags & MULTISET_INDVAL)
00219 
00220 /* This is somewhat faster than using multiset_sizeof just to
00221  * check whether or not the multiset has no elements at all. */
00222 #define multiset_is_empty(L) (!(L)->msd->root)
00223 
00224 PMOD_PROTO void really_free_multiset (struct multiset *l);
00225 
00226 #define free_multiset(L) do {                                           \
00227     struct multiset *_ms_ = (L);                                        \
00228     debug_malloc_touch (_ms_);                                          \
00229     DO_IF_DEBUG (                                                       \
00230       DO_IF_PIKE_CLEANUP (                                              \
00231         if (gc_external_refs_zapped)                                    \
00232           gc_check_zapped (_ms_, PIKE_T_MULTISET, __FILE__, __LINE__))); \
00233     if (!sub_ref (_ms_)) really_free_multiset (_ms_);                   \
00234   } while (0)
00235 
00236 #ifdef PIKE_DEBUG
00237 
00238 union msnode *debug_check_msnode (
00239   struct multiset *l, ptrdiff_t nodepos, int allow_deleted,
00240   char *file, int line);
00241 #define check_msnode(L, NODEPOS, ALLOW_DELETED)                         \
00242   debug_check_msnode ((L), (NODEPOS), (ALLOW_DELETED), __FILE__, __LINE__)
00243 #define access_msnode(L, NODEPOS)                                       \
00244   check_msnode ((L), (NODEPOS), 0)
00245 
00246 #else
00247 
00248 #define check_msnode(L, NODEPOS, ALLOW_DELETED)
00249 #define access_msnode(L, NODEPOS) OFF2MSNODE ((L)->msd, (NODEPOS))
00250 
00251 #endif
00252 
00253 BLOCK_ALLOC_FILL_PAGES (multiset, 2);
00254 
00255 /* See rbtree.h for a description of the operations.
00256  *
00257  * If cmp_less is used, it's a function pointer used as `< to compare
00258  * the entries, otherwise the internal set order is used. `< need not
00259  * define a total order for the possible indices; if neither a < b nor
00260  * b < a is true then a and b are considered equal orderwise. The
00261  * order between such indices is arbitrary and stable. The orderwise
00262  * equality doesn't affect searches on equality, however; if several
00263  * orderwise equal values are found, then they are searched linearly
00264  * backwards until one is found which is equal to the key according to
00265  * `==.
00266  *
00267  * It's possible to keep references to individual nodes. The node
00268  * offset within the multiset data block is used then, which together
00269  * with the multiset struct can access the node. Use add_msnode_ref
00270  * when you store a node reference and sub_msnode_ref when you throw
00271  * it away. The multiset_find_*, multiset_first, multiset_last and
00272  * multiset_get_nth functions do add_msnode_ref for you (if they
00273  * return a match). Other functions, like multiset_insert_2, doesn't,
00274  * even though they might return a node offset.
00275  *
00276  * msnode_is_deleted tells whether the referenced node has been
00277  * deleted. The relative position of a deleted node is remembered by
00278  * keeping pointers to the neighbors it had when it was deleted. A
00279  * "defensive" strategy is employed when a deleted node is used in a
00280  * function: If going forward then the previous neighbor links are
00281  * followed until a nondeleted neighbor is found, which is then used
00282  * as the base for the forward movement. Vice versa in the backward
00283  * direction. This has the effect that if nodes are added and removed
00284  * in a multiset that is being traversed in some direction, then no
00285  * newly added nodes in the vicinity of the current one are missed. It
00286  * also has the effect that the node returned by multiset_next for a
00287  * deleted node might be before the one returned by multiset_prev.
00288  *
00289  * Since the functions might run pike code when comparing entries
00290  * (even when cmp_less isn't used), the multiset may change during the
00291  * search in it. If that happens for a destructive operation, it's
00292  * remade in one way or the other to ensure that the change has been
00293  * made in the multiset that is current upon return. This normally has
00294  * no caller visible effects, except for multiset_add_after, which
00295  * might fail to add the requested entry (it returns less than zero in
00296  * that case).
00297  */
00298 
00299 /* Returns the node offset, or -1 if no match was found. */
00300 PMOD_EXPORT ptrdiff_t multiset_find_eq (struct multiset *l, struct svalue *key);
00301 PMOD_EXPORT ptrdiff_t multiset_find_lt (struct multiset *l, struct svalue *key);
00302 PMOD_EXPORT ptrdiff_t multiset_find_gt (struct multiset *l, struct svalue *key);
00303 PMOD_EXPORT ptrdiff_t multiset_find_le (struct multiset *l, struct svalue *key);
00304 PMOD_EXPORT ptrdiff_t multiset_find_ge (struct multiset *l, struct svalue *key);
00305 PMOD_EXPORT ptrdiff_t multiset_first (struct multiset *l);
00306 PMOD_EXPORT ptrdiff_t multiset_last (struct multiset *l);
00307 PMOD_EXPORT ptrdiff_t multiset_prev (struct multiset *l, ptrdiff_t nodepos);
00308 PMOD_EXPORT ptrdiff_t multiset_next (struct multiset *l, ptrdiff_t nodepos);
00309 
00310 #ifdef PIKE_DEBUG
00311 PMOD_EXPORT extern const char msg_multiset_no_node_refs[];
00312 #endif
00313 
00314 #define add_msnode_ref(L) do {(L)->node_refs++;} while (0)
00315 #define sub_msnode_ref(L) do {                                          \
00316     struct multiset *_ms_ = (L);                                        \
00317     DO_IF_DEBUG (                                                       \
00318       if (!_ms_->node_refs) Pike_fatal (msg_multiset_no_node_refs);     \
00319     );                                                                  \
00320     if (!--_ms_->node_refs && _ms_->msd->refs == 1)                     \
00321       multiset_clear_node_refs (_ms_);                                  \
00322   } while (0)
00323 
00324 PMOD_EXPORT void do_sub_msnode_ref (struct multiset *l);
00325 PMOD_EXPORT int msnode_is_deleted (struct multiset *l, ptrdiff_t nodepos);
00326 
00327 #define assign_multiset_index_no_free(TO, L, NODEPOS) do {              \
00328     struct multiset *_ms_ = (L);                                        \
00329     union msnode *_ms_node_ = access_msnode (_ms_, (NODEPOS));          \
00330     low_assign_multiset_index_no_free (TO, _ms_node_);                  \
00331   } while (0)
00332 #define assign_multiset_index(TO, L, NODEPOS) do {                      \
00333     struct multiset *_ms_ = (L);                                        \
00334     union msnode *_ms_node_ = access_msnode (_ms_, (NODEPOS));          \
00335     low_assign_multiset_index (TO, _ms_node_);                          \
00336   } while (0)
00337 #define push_multiset_index(L, NODEPOS)                                 \
00338   assign_multiset_index_no_free (Pike_sp++, (L), (NODEPOS))
00339 #define use_multiset_index(L, NODEPOS, VAR)                             \
00340   ((VAR) = access_msnode ((L), (NODEPOS))->i.ind,                       \
00341    (VAR).type &= ~MULTISET_FLAG_MASK,                                   \
00342    &(VAR))
00343 
00344 #define get_multiset_value(L, NODEPOS)                                  \
00345   ((L)->msd->flags & MULTISET_INDVAL ?                                  \
00346    &access_msnode ((L), (NODEPOS))->iv.val : &svalue_int_one)
00347 #define set_multiset_value(L, NODEPOS, VAL) do {                        \
00348     if ((L)->msd->flags & MULTISET_INDVAL)                              \
00349       assign_svalue (&access_msnode ((L), (NODEPOS))->iv.val, VAL);     \
00350   } while (0)
00351 /* Note: It's intentional that the value is silently ignored for
00352  * index-only multisets. */
00353 
00354 #define assign_multiset_value_no_free(TO, L, NODEPOS)                   \
00355   assign_svalue_no_free (TO, get_multiset_value (L, NODEPOS))
00356 #define assign_multiset_value(TO, L, NODEPOS)                           \
00357   assign_svalue (TO, get_multiset_value (L, NODEPOS))
00358 #define push_multiset_value(L, NODEPOS)                                 \
00359   push_svalue (get_multiset_value (L, NODEPOS))
00360 
00361 #define allocate_multiset(allocsize, flags, cmp_less)                   \
00362   dmalloc_touch (struct multiset *,                                     \
00363                  real_allocate_multiset ((allocsize), (flags), (cmp_less)))
00364 
00365 PMOD_EXPORT struct multiset *real_allocate_multiset (int allocsize,
00366                                                      int flags,
00367                                                      struct svalue *cmp_less);
00368 PMOD_EXPORT void do_free_multiset (struct multiset *l);
00369 void multiset_fix_type_field (struct multiset *l);
00370 PMOD_EXPORT void multiset_set_flags (struct multiset *l, int flags);
00371 PMOD_EXPORT void multiset_set_cmp_less (struct multiset *l,
00372                                         struct svalue *cmp_less);
00373 PMOD_EXPORT struct multiset *mkmultiset (struct array *indices);
00374 PMOD_EXPORT struct multiset *mkmultiset_2 (struct array *indices,
00375                                            struct array *values,
00376                                            struct svalue *cmp_less);
00377 PMOD_EXPORT void multiset_insert (struct multiset *l,
00378                                   struct svalue *ind);
00379 PMOD_EXPORT ptrdiff_t multiset_insert_2 (struct multiset *l,
00380                                          struct svalue *ind,
00381                                          struct svalue *val,
00382                                          int replace);
00383 PMOD_EXPORT ptrdiff_t multiset_add (struct multiset *l,
00384                                     struct svalue *ind,
00385                                     struct svalue *val);
00386 PMOD_EXPORT ptrdiff_t multiset_add_after (struct multiset *l,
00387                                           ptrdiff_t node,
00388                                           struct svalue *ind,
00389                                           struct svalue *val);
00390 PMOD_EXPORT int multiset_delete (struct multiset *l,
00391                                  struct svalue *ind);
00392 PMOD_EXPORT int multiset_delete_2 (struct multiset *l,
00393                                    struct svalue *ind,
00394                                    struct svalue *removed_val);
00395 PMOD_EXPORT void multiset_delete_node (struct multiset *l,
00396                                        ptrdiff_t node);
00397 PMOD_EXPORT int multiset_member (struct multiset *l,
00398                                  struct svalue *key);
00399 PMOD_EXPORT struct svalue *multiset_lookup (struct multiset *l,
00400                                             struct svalue *key);
00401 struct array *multiset_indices (struct multiset *l);
00402 struct array *multiset_values (struct multiset *l);
00403 struct array *multiset_range_indices (struct multiset *l,
00404                                       ptrdiff_t beg, ptrdiff_t end);
00405 struct array *multiset_range_values (struct multiset *l,
00406                                      ptrdiff_t beg, ptrdiff_t end);
00407 PMOD_EXPORT void check_multiset_for_destruct (struct multiset *l);
00408 PMOD_EXPORT struct multiset *copy_multiset (struct multiset *l);
00409 PMOD_EXPORT struct multiset *merge_multisets (struct multiset *a,
00410                                               struct multiset *b,
00411                                               int operation);
00412 PMOD_EXPORT struct multiset *add_multisets (struct svalue *argp, int count);
00413 PMOD_EXPORT int multiset_equal_p (struct multiset *a, struct multiset *b,
00414                                   struct processing *p);
00415 void describe_multiset (struct multiset *l, struct processing *p, int indent);
00416 void simple_describe_multiset (struct multiset *l);
00417 int multiset_is_constant (struct multiset *l, struct processing *p);
00418 node *make_node_from_multiset (struct multiset *l);
00419 PMOD_EXPORT void f_aggregate_multiset (int args);
00420 struct multiset *copy_multiset_recursively (struct multiset *l,
00421                                             struct mapping *p);
00422 PMOD_EXPORT ptrdiff_t multiset_get_nth (struct multiset *l, size_t n);
00423 
00424 unsigned gc_touch_all_multisets (void);
00425 void gc_check_all_multisets (void);
00426 void gc_mark_multiset_as_referenced (struct multiset *l);
00427 void gc_mark_all_multisets (void);
00428 void gc_zap_ext_weak_refs_in_multisets (void);
00429 void real_gc_cycle_check_multiset (struct multiset *l, int weak);
00430 void gc_cycle_check_all_multisets (void);
00431 size_t gc_free_all_unreferenced_multisets (void);
00432 #define gc_cycle_check_multiset(X, WEAK) \
00433   gc_cycle_enqueue ((gc_cycle_check_cb *) real_gc_cycle_check_multiset, (X), (WEAK))
00434 
00435 #ifdef PIKE_DEBUG
00436 void check_multiset (struct multiset *l, int safe);
00437 void check_all_multisets (int safe);
00438 void debug_dump_multiset (struct multiset *l);
00439 #endif
00440 
00441 void count_memory_in_multisets (INT32 *num, INT32 *size);
00442 void init_multiset (void);
00443 void exit_multiset (void);
00444 void test_multiset (void);
00445 
00446 #endif /* MULTISET_H */

Generated on Fri Jul 22 23:44:26 2005 for Pike by  doxygen 1.3.9.1