An extensible data structure for massive streaming graphs
 All Data Structures Files Functions Variables Typedefs Macros Groups Pages
stinger-utils.h File Reference
#include "stinger.h"

Macros

#define INITIAL_GRAPH_NAME_DEFAULT   "initial-graph.bin"
#define ACTION_STREAM_NAME_DEFAULT   "action-stream.bin"
#define BATCH_SIZE_DEFAULT   1
#define NBATCH_DEFAULT   100
#define STATS_INIT()
#define STATS_END()   do { printf ("\n}\n"); } while (0)
#define BATCH_SIZE_CHECK()
#define PRINT_STAT_INT64(NAME_, VALUE_)
#define PRINT_STAT_INT64_ARRAY_AS_PAIR(NAME_, ARY_, LEN_)
#define PRINT_STAT_DOUBLE(NAME_, VALUE_)
#define PRINT_STAT_HEX64(NAME_, VALUE_)

Functions

void usage (FILE *out, char *progname)
 Prints command line input information and defaults to the screen.
void parse_args (const int argc, char *argv[], char **initial_graph_name, char **action_stream_name, int64_t *batch_size, int64_t *nbatch)
 Parses command line arguments.
void snarf_graph (const char *initial_graph_name, int64_t *nv_out, int64_t *ne_out, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **mem_handle)
 Load an input graph from disk.
void snarf_actions (const char *action_stream_name, int64_t *naction_out, int64_t **action_out, int64_t **mem_handle)
 Loads an action file from disk.
void load_graph_and_action_stream (const char *initial_graph_name, int64_t *nv, int64_t *ne, int64_t **off, int64_t **ind, int64_t **weight, int64_t **graphmem, const char *action_stream_name, int64_t *naction, int64_t **action, int64_t **actionmem)
 Load an initial graph and an action stream from disk.
void stinger_to_sorted_csr (const struct stinger *G, const int64_t nv, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **timefirst_out, int64_t **timerecent_out, int64_t **type_out)
 Converts a STINGER data structure to sorted compressed sparse row (CSR)
void stinger_to_unsorted_csr (const struct stinger *G, const int64_t nv, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **timefirst_out, int64_t **timerecent_out, int64_t **type_out)
 Converts a STINGER data structure to compressed sparse row (CSR)
void counting_sort (int64_t *array, size_t num, size_t size)
 A basic counting sort.
int64_t find_in_sorted (const int64_t tofind, const int64_t N, const int64_t *restrict ary)
void print_initial_graph_stats (int64_t nv, int64_t ne, int64_t batch_size, int64_t nbatch, int64_t naction)
 Prints basic statistics about the graph loaded from disk.
void edge_list_to_csr (int64_t nv, int64_t ne, int64_t *sv1, int64_t *ev1, int64_t *w1, int64_t *timeRecent, int64_t *timeFirst, int64_t *ev2, int64_t *w2, int64_t *offset, int64_t *t2, int64_t *t1)
 Convert a plain edge list to compressed sparse row (CSR) format. If one or both timestamp input arrays are NULL, they will be ignored.
struct stingeredge_list_to_stinger (int64_t nv, int64_t ne, int64_t *sv, int64_t *ev, int64_t *w, int64_t *timeRecent, int64_t *timeFirst, int64_t timestamp)
 Take a plain edge list and convert it into a STINGER. If only recent timestamps or modified timestamps are given, they will be used for both. If neither are given, the default is used.
void stinger_sort_edge_list (const struct stinger *S, const int64_t srcvtx, const int64_t type)
 For a given vertex and edge type in STINGER, sort the adjacency list.
void bucket_sort_pairs (int64_t *array, size_t num)
 A simple bucket sort for an array of tuples.
void radix_sort_pairs (int64_t *x, int64_t length, int64_t numBits)
 A radix sort for edge tuples.
int64_t bs64 (int64_t xin)
void bs64_n (size_t n, int64_t *restrict d)
int i64_cmp (const void *a, const void *b)
 Simple comparator for int64_t.
int i2cmp (const void *va, const void *vb)
 Simple comparator for pairs of int64_t.
int64_t prefix_sum (const int64_t n, int64_t *ary)
 Inclusive prefix sum utility function.

Macro Definition Documentation

#define ACTION_STREAM_NAME_DEFAULT   "action-stream.bin"

Referenced by usage().

#define BATCH_SIZE_CHECK ( )
Value:
do { \
if (naction < nbatch * batch_size) { \
fprintf (stderr, "WARNING: not enough actions\n"); \
nbatch = (naction + batch_size - 1) / batch_size; \
} \
} while (0)
#define BATCH_SIZE_DEFAULT   1

Referenced by usage().

#define INITIAL_GRAPH_NAME_DEFAULT   "initial-graph.bin"

Referenced by usage().

#define NBATCH_DEFAULT   100

Referenced by usage().

#define PRINT_STAT_DOUBLE (   NAME_,
  VALUE_ 
)
Value:
do { \
printf(",\n\t\"%s\": %20.15e", NAME_, VALUE_); \
} while (0)
#define PRINT_STAT_HEX64 (   NAME_,
  VALUE_ 
)
Value:
do { \
printf(",\n\t\"%s\":\"0x%lx\"", NAME_, VALUE_); \
} while (0)
#define PRINT_STAT_INT64 (   NAME_,
  VALUE_ 
)
Value:
do { \
printf(",\n\t\"%s\": %ld", NAME_, VALUE_); \
} while (0)
#define PRINT_STAT_INT64_ARRAY_AS_PAIR (   NAME_,
  ARY_,
  LEN_ 
)
Value:
do { \
printf(",\n\t\"%s\": [", NAME_); \
for(uint64_t i = 0; i < LEN_ - 1; i++) { \
printf("\n\t\t[ %ld, %ld],", i, ARY_ [i]); \
} \
printf("\n\t\t[ %ld, %ld]", LEN_ -1, ARY_ [LEN_ -1]); \
printf("\n\t]"); \
} while (0)
#define STATS_END ( )    do { printf ("\n}\n"); } while (0)
#define STATS_INIT ( )
Value:
do { \
init_timer (); \
printf ("{\n"); \
printf ("\t\"timer_res\": %20.15e,", timer_getres ()); \
} while (0)

Function Documentation

int64_t bs64 ( int64_t  xin)

Referenced by bs64_n(), and stinger_open_from_file().

void bs64_n ( size_t  n,
int64_t *restrict  d 
)
void bucket_sort_pairs ( int64_t *  array,
size_t  num 
)

A simple bucket sort for an array of tuples.

This function sorts an array of tuples, first by the leading element, then by the trailing element. It accomplishes this by bucketing by the first element, then sorting by the second element within the bucket.

On the Cray XMT, this sort performed very well up to 32 processors. Hot-spotting and load imbalance make it infeasible for large processor counts.

Parameters
arrayInput array of tuples to sort
numNumber of tuples to sort

References xmalloc().

Referenced by stinger_sort_actions().

void counting_sort ( int64_t *  array,
size_t  num,
size_t  size 
)

A basic counting sort.

Parameters
arrayInput array of numbers to be sorted
numNumber of items to be sorted
sizeSize of each item in array[]

References xmalloc().

void edge_list_to_csr ( int64_t  nv,
int64_t  ne,
int64_t *  sv1,
int64_t *  ev1,
int64_t *  w1,
int64_t *  timeRecent,
int64_t *  timeFirst,
int64_t *  ev2,
int64_t *  w2,
int64_t *  offset,
int64_t *  t2,
int64_t *  t1 
)

Convert a plain edge list to compressed sparse row (CSR) format. If one or both timestamp input arrays are NULL, they will be ignored.

Parameters
nvNumber of vertices
neNumber of edges
sv1Input array of source vertices
ev1Input array of destination vertices
timeRecentArray of timestamps or NULL
timeFirstArray of timestamps or NULL
w1Input array of edge weights
ev2Output array of destination vertices
w2Output array of edge weights
offsetOutput array of vertex offsets into ev2[], w2[], t2[], and t1[]
t2Output array of timestamps or NULL
t1Output array of timestamps or NULL

Referenced by edge_list_to_stinger().

struct stinger* edge_list_to_stinger ( int64_t  nv,
int64_t  ne,
int64_t *  sv,
int64_t *  ev,
int64_t *  w,
int64_t *  timeRecent,
int64_t *  timeFirst,
int64_t  timestamp 
)
read

Take a plain edge list and convert it into a STINGER. If only recent timestamps or modified timestamps are given, they will be used for both. If neither are given, the default is used.

Parameters
nvNumber of vertices
neNumber of edges
svArray of source vertices
evArray of destination vertices
wArray of edge weights
timeRecentArray of timestamps or NULL
timeFirstArray of timestamps or NULL
timestampDefault timestamp (if recent and modified not given)
Returns
A STINGER data structure

References edge_list_to_csr(), stinger_new(), stinger_set_initial_edges(), and xmalloc().

int64_t find_in_sorted ( const int64_t  tofind,
const int64_t  N,
const int64_t *restrict  ary 
)
int i2cmp ( const void *  va,
const void *  vb 
)

Simple comparator for pairs of int64_t.

integers in a pair must be stored contiguously.

Parameters
aPointer to first integer pair.
bPointer to second integer pair.
Returns
> 0 if a is greater, < 0 if b is greater, 0 if equal.

Referenced by stinger_sort_actions().

int i64_cmp ( const void *  a,
const void *  b 
)

Simple comparator for int64_t.

Parameters
aPointer to first integer.
bPointer to second integer.
Returns
> 0 if a is greater, < 0 if b is greater, 0 if equal.

Referenced by main().

void load_graph_and_action_stream ( const char *  initial_graph_name,
int64_t *  nv,
int64_t *  ne,
int64_t **  off,
int64_t **  ind,
int64_t **  weight,
int64_t **  graphmem,
const char *  action_stream_name,
int64_t *  naction,
int64_t **  action,
int64_t **  actionmem 
)

Load an initial graph and an action stream from disk.

This function provides a wrapper to snarf_graph() and snarf_actions().

Parameters
initial_graph_nameFilename/path of initial graph on disk
nvReturns the number of vertices
neReturns the number of edges
offReturns the CSR offset array
indReturns the destination vertex array
weightReturns the edge weight array
graphmemPointer to the initial graph memory allocation
action_stream_nameFilename/path of action stream on disk
nactionReturns the number of actions read
actionReturns the action <source, destination> pair array
actionmemPointer to the action stream memory allocation

References snarf_actions(), snarf_graph(), tic(), and toc().

void parse_args ( const int  argc,
char *  argv[],
char **  initial_graph_name,
char **  action_stream_name,
int64_t *  batch_size,
int64_t *  nbatch 
)

Parses command line arguments.

Parses the command line input as given by usage(). Batch size, number of batches, initial graph filename, and action stream filename are given by to the caller if they were specified on the command line.

Parameters
argcThe number of arguments
argv[]The array of arguments
initial_graph_namePath/filename of the initial input graph on disk
action_stream_namePath/filename of the action stream on disk
batch_sizeNumber of edge actions to consider in one batch
nbatchNumber of batchs to process

References usage().

int64_t prefix_sum ( const int64_t  n,
int64_t *  ary 
)

Inclusive prefix sum utility function.

This sum is inclusive meaning that the first element of the output will be equal to the first element of the input (not necessarily 0).

If compiled with OpenMP, assumes that you are in parallel section and are calling this with all threads (although it will also work outside of a parallel OpenMP context - just DO NOT call this in OpenMP single or master inside of a parallel region).

If compiled without OpenMP this is implemented as a simple serial prefix sum that should be auto-parallelized by the Cray MTA/XMT compiler.

Parameters
nThe input array size.
aryThe input array pointer.
Returns
The final element of the sum.

Referenced by main(), stinger_save_to_file(), and stinger_sort_actions().

void print_initial_graph_stats ( int64_t  nv,
int64_t  ne,
int64_t  batch_size,
int64_t  nbatch,
int64_t  naction 
)

Prints basic statistics about the graph loaded from disk.

Parameters
nvNumber of vertices
neNumber of edges
batch_sizeBatch size of the action stream (specified on the command line)
nbatchNumber of batches (specified on the command line)
nactionNumber of edge actions in the action stream
void radix_sort_pairs ( int64_t *  x,
int64_t  length,
int64_t  numBits 
)

A radix sort for edge tuples.

This function replaces the bucket_sort_pairs() in STINGER.

Parameters
xInput array of tuples sort
lengthNumber of tuples to sort
numBitsNumber of bits to use for the radix

References xmalloc().

void snarf_actions ( const char *  action_stream_name,
int64_t *  naction_out,
int64_t **  action_out,
int64_t **  mem_handle 
)

Loads an action file from disk.

Updates to the initial graph are stored in an action stream file. This file may have been generated by genstreams. The actions are packed in source/destination pairs. A deletion is indicated if both source and destination vertex IDs are complemented.

Parameters
action_stream_nameFilename/path to the action file on disk
naction_outReturns the number of actions loaded
action_outReturns the array of actions
mem_handleReturns a pointer to the block of memory allocated, for freeing later

References bs64_n(), endian_check, and xmalloc().

Referenced by load_graph_and_action_stream().

void snarf_graph ( const char *  initial_graph_name,
int64_t *  nv_out,
int64_t *  ne_out,
int64_t **  off_out,
int64_t **  ind_out,
int64_t **  weight_out,
int64_t **  mem_handle 
)

Load an input graph from disk.

This function loads a CSR-like binary representation of a graph from disk. The input file may have been generated by genstreams. Edges are stored as a vertex offset plus destination vertex ID and weight.

Parameters
initial_graph_nameFilename/path of the initial graph file on disk
nv_outReturns number of vertices
ne_outReturns number of edges
off_outReturns the offset array indicating the start of each vertex's edges
ind_outReturns the edges sorted by source vertex
weight_outReturns the integer weights corresponding to ind_out[]
mem_handleReturns a pointer to the block of memory allocated, for freeing later

References bs64_n(), endian_check, and xmalloc().

Referenced by load_graph_and_action_stream().

void stinger_sort_edge_list ( const struct stinger S,
const int64_t  srcvtx,
const int64_t  type 
)

For a given vertex and edge type in STINGER, sort the adjacency list.

This function sorts the linked block data structure inside STINGER for a particular vertex ID and edge type. Since STINGER is assumed to be changing, we cannot guarantee that the adjacency list will remain sorted. We provide this function such that some algorithms may see a small speed-up if sorted or partially sorted.

This function is currently EXPERIMENTAL. There are known bugs. Please report bugs to the development team.

Parameters
SThe STINGER data structure
srcvtxVertex ID of the adjacency list to sort
typeEdge type of the adjacency list to sort

References ebpool, stinger_eb::edges, stinger_vb::edges, stinger_eb::etype, stinger_eb::high, stinger_eb::largeStamp, stinger::LVA, stinger_edge::neighbor, stinger_eb::next, stinger_eb::numEdges, stinger_eb::smallStamp, stinger_eb_is_blank(), STINGER_EDGEBLOCKSIZE, stinger_edge::timeFirst, stinger_edge::timeRecent, and stinger_edge::weight.

void stinger_to_sorted_csr ( const struct stinger G,
const int64_t  nv,
int64_t **  off_out,
int64_t **  ind_out,
int64_t **  weight_out,
int64_t **  timefirst_out,
int64_t **  timerecent_out,
int64_t **  type_out 
)

Converts a STINGER data structure to sorted compressed sparse row (CSR)

Returns all data contained in the STINGER structure for vertices 0 to nv in CSR format. In this format, the adjacencies of a vertex v are stored in ind_out[] starting at index off_out[v] and ending at off_out[v+1]. The adjacencies of a particular vertex will be sorted in this range. The metadata for the edges is stored at the same offset in each corresponding metadata array. Optionally any of the output pointers for these arrays can be NULL and the array will not be returned.

Parameters
GThe STINGER data structure
nvNumber of vertices
off_outReturns the vertex offset array
ind_outReturns the destination edge array
weight_outOPTIONAL Returns the destination edge weight array
timefirst_outOPTIONAL Returns the destination edge timefirst array
timerecent_outOPTIONAL Returns the destination edge timerecent array
type_outOPTIONAL Returns the destination edge type array

References copy_metadata_based_on_pointer_array(), csr_cmp(), stinger_to_unsorted_csr(), and xmalloc().

Referenced by stinger_consistency_check().

void stinger_to_unsorted_csr ( const struct stinger G,
const int64_t  nv,
int64_t **  off_out,
int64_t **  ind_out,
int64_t **  weight_out,
int64_t **  timefirst_out,
int64_t **  timerecent_out,
int64_t **  type_out 
)

Converts a STINGER data structure to compressed sparse row (CSR)

Returns all data contained in the STINGER structure for vertices 0 to nv in CSR format. In this format, the adjacencies of a vertex v are stored in ind_out[] starting at index off_out[v] and ending at off_out[v+1]. The adjacencies of a particular vertex will not be sorted. The metadata for the edges is stored at the same offset in each corresponding metadata array. Optionally any of the output pointers for these arrays can be NULL and the array will not be returned.

Parameters
GThe STINGER data structure
nvNumber of vertices
off_outReturns the vertex offset array
ind_outReturns the destination edge array
weight_outOPTIONAL Returns the destination edge weight array
timefirst_outOPTIONAL Returns the destination edge timefirst array
timerecent_outOPTIONAL Returns the destination edge timerecent array
type_outOPTIONAL Returns the destination edge type array

References stinger_gather_successors(), stinger_outdegree(), xcalloc(), and xmalloc().

Referenced by stinger_to_sorted_csr().

void usage ( FILE *  out,
char *  progname 
)

Prints command line input information and defaults to the screen.

Parameters
outWhere to write, likely stderr.
prognameThe program binary, likely argv[0].

References ACTION_STREAM_NAME_DEFAULT, BATCH_SIZE_DEFAULT, INITIAL_GRAPH_NAME_DEFAULT, and NBATCH_DEFAULT.

Referenced by parse_args().

 

comments powered by Disqus