An extensible data structure for massive streaming graphs
|
#include <stdio.h>
#include "stinger.h"
#include "stinger-atomics.h"
#include "stinger-utils.h"
#include "timer.h"
#include "xmalloc.h"
Functions | |
void | usage (FILE *out, char *progname) |
Prints command line input information and defaults to the screen. | |
void | parse_args (const int argc, char *argv[], char **initial_graph_name, char **action_stream_name, int64_t *batch_size, int64_t *nbatch) |
Parses command line arguments. | |
void | bs64_n (size_t n, int64_t *restrict d) |
void | snarf_graph (const char *initial_graph_name, int64_t *nv_out, int64_t *ne_out, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **mem_handle) |
Load an input graph from disk. | |
void | snarf_actions (const char *action_stream_name, int64_t *naction_out, int64_t **action_out, int64_t **mem_handle) |
Loads an action file from disk. | |
void | load_graph_and_action_stream (const char *initial_graph_name, int64_t *nv, int64_t *ne, int64_t **off, int64_t **ind, int64_t **weight, int64_t **graphmem, const char *action_stream_name, int64_t *naction, int64_t **action, int64_t **actionmem) |
Load an initial graph and an action stream from disk. | |
int | csr_cmp (const void *a, const void *b) |
void | copy_metadata_based_on_pointer_array (int64_t count, int64_t *metadata, int64_t offset, int64_t **pointers, int64_t *base, int64_t *buffer) |
void | stinger_to_sorted_csr (const struct stinger *G, const int64_t nv, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **timefirst_out, int64_t **timerecent_out, int64_t **type_out) |
Converts a STINGER data structure to sorted compressed sparse row (CSR) | |
void | stinger_to_unsorted_csr (const struct stinger *G, const int64_t nv, int64_t **off_out, int64_t **ind_out, int64_t **weight_out, int64_t **timefirst_out, int64_t **timerecent_out, int64_t **type_out) |
Converts a STINGER data structure to compressed sparse row (CSR) | |
void | counting_sort (int64_t *array, size_t num, size_t size) |
A basic counting sort. | |
void | print_initial_graph_stats (int64_t nv, int64_t ne, int64_t batch_size, int64_t nbatch, int64_t naction) |
Prints basic statistics about the graph loaded from disk. | |
void | edge_list_to_csr (int64_t nv, int64_t ne, int64_t *sv1, int64_t *ev1, int64_t *w1, int64_t *timeRecent, int64_t *timeFirst, int64_t *ev2, int64_t *w2, int64_t *offset, int64_t *t2, int64_t *t1) |
Convert a plain edge list to compressed sparse row (CSR) format. If one or both timestamp input arrays are NULL, they will be ignored. | |
struct stinger * | edge_list_to_stinger (int64_t nv, int64_t ne, int64_t *sv, int64_t *ev, int64_t *w, int64_t *timeRecent, int64_t *timeFirst, int64_t timestamp) |
Take a plain edge list and convert it into a STINGER. If only recent timestamps or modified timestamps are given, they will be used for both. If neither are given, the default is used. | |
void | stinger_sort_edge_list (const struct stinger *S, const int64_t srcvtx, const int64_t type) |
For a given vertex and edge type in STINGER, sort the adjacency list. | |
void | bucket_sort_pairs (int64_t *array, size_t num) |
A simple bucket sort for an array of tuples. | |
void | radix_sort_pairs (int64_t *x, int64_t length, int64_t numBits) |
A radix sort for edge tuples. | |
int | i64_cmp (const void *a, const void *b) |
Simple comparator for int64_t. | |
int | i2cmp (const void *va, const void *vb) |
Simple comparator for pairs of int64_t. | |
int64_t | prefix_sum (const int64_t n, int64_t *ary) |
Inclusive prefix sum utility function. |
void bs64_n | ( | size_t | n, |
int64_t *restrict | d | ||
) |
References bs64().
Referenced by snarf_actions(), snarf_graph(), and stinger_open_from_file().
void bucket_sort_pairs | ( | int64_t * | array, |
size_t | num | ||
) |
A simple bucket sort for an array of tuples.
This function sorts an array of tuples, first by the leading element, then by the trailing element. It accomplishes this by bucketing by the first element, then sorting by the second element within the bucket.
On the Cray XMT, this sort performed very well up to 32 processors. Hot-spotting and load imbalance make it infeasible for large processor counts.
array | Input array of tuples to sort |
num | Number of tuples to sort |
References xmalloc().
Referenced by stinger_sort_actions().
|
inline |
Referenced by stinger_to_sorted_csr().
void counting_sort | ( | int64_t * | array, |
size_t | num, | ||
size_t | size | ||
) |
A basic counting sort.
array | Input array of numbers to be sorted |
num | Number of items to be sorted |
size | Size of each item in array[] |
References xmalloc().
int csr_cmp | ( | const void * | a, |
const void * | b | ||
) |
Referenced by stinger_to_sorted_csr().
void edge_list_to_csr | ( | int64_t | nv, |
int64_t | ne, | ||
int64_t * | sv1, | ||
int64_t * | ev1, | ||
int64_t * | w1, | ||
int64_t * | timeRecent, | ||
int64_t * | timeFirst, | ||
int64_t * | ev2, | ||
int64_t * | w2, | ||
int64_t * | offset, | ||
int64_t * | t2, | ||
int64_t * | t1 | ||
) |
Convert a plain edge list to compressed sparse row (CSR) format. If one or both timestamp input arrays are NULL, they will be ignored.
nv | Number of vertices |
ne | Number of edges |
sv1 | Input array of source vertices |
ev1 | Input array of destination vertices |
timeRecent | Array of timestamps or NULL |
timeFirst | Array of timestamps or NULL |
w1 | Input array of edge weights |
ev2 | Output array of destination vertices |
w2 | Output array of edge weights |
offset | Output array of vertex offsets into ev2[], w2[], t2[], and t1[] |
t2 | Output array of timestamps or NULL |
t1 | Output array of timestamps or NULL |
Referenced by edge_list_to_stinger().
|
read |
Take a plain edge list and convert it into a STINGER. If only recent timestamps or modified timestamps are given, they will be used for both. If neither are given, the default is used.
nv | Number of vertices |
ne | Number of edges |
sv | Array of source vertices |
ev | Array of destination vertices |
w | Array of edge weights |
timeRecent | Array of timestamps or NULL |
timeFirst | Array of timestamps or NULL |
timestamp | Default timestamp (if recent and modified not given) |
References edge_list_to_csr(), stinger_new(), stinger_set_initial_edges(), and xmalloc().
int i2cmp | ( | const void * | va, |
const void * | vb | ||
) |
Simple comparator for pairs of int64_t.
integers in a pair must be stored contiguously.
a | Pointer to first integer pair. |
b | Pointer to second integer pair. |
Referenced by stinger_sort_actions().
int i64_cmp | ( | const void * | a, |
const void * | b | ||
) |
Simple comparator for int64_t.
a | Pointer to first integer. |
b | Pointer to second integer. |
Referenced by main().
void load_graph_and_action_stream | ( | const char * | initial_graph_name, |
int64_t * | nv, | ||
int64_t * | ne, | ||
int64_t ** | off, | ||
int64_t ** | ind, | ||
int64_t ** | weight, | ||
int64_t ** | graphmem, | ||
const char * | action_stream_name, | ||
int64_t * | naction, | ||
int64_t ** | action, | ||
int64_t ** | actionmem | ||
) |
Load an initial graph and an action stream from disk.
This function provides a wrapper to snarf_graph() and snarf_actions().
initial_graph_name | Filename/path of initial graph on disk |
nv | Returns the number of vertices |
ne | Returns the number of edges |
off | Returns the CSR offset array |
ind | Returns the destination vertex array |
weight | Returns the edge weight array |
graphmem | Pointer to the initial graph memory allocation |
action_stream_name | Filename/path of action stream on disk |
naction | Returns the number of actions read |
action | Returns the action <source, destination> pair array |
actionmem | Pointer to the action stream memory allocation |
References snarf_actions(), snarf_graph(), tic(), and toc().
void parse_args | ( | const int | argc, |
char * | argv[], | ||
char ** | initial_graph_name, | ||
char ** | action_stream_name, | ||
int64_t * | batch_size, | ||
int64_t * | nbatch | ||
) |
Parses command line arguments.
Parses the command line input as given by usage(). Batch size, number of batches, initial graph filename, and action stream filename are given by to the caller if they were specified on the command line.
argc | The number of arguments |
argv[] | The array of arguments |
initial_graph_name | Path/filename of the initial input graph on disk |
action_stream_name | Path/filename of the action stream on disk |
batch_size | Number of edge actions to consider in one batch |
nbatch | Number of batchs to process |
References usage().
int64_t prefix_sum | ( | const int64_t | n, |
int64_t * | ary | ||
) |
Inclusive prefix sum utility function.
This sum is inclusive meaning that the first element of the output will be equal to the first element of the input (not necessarily 0).
If compiled with OpenMP, assumes that you are in parallel section and are calling this with all threads (although it will also work outside of a parallel OpenMP context - just DO NOT call this in OpenMP single or master inside of a parallel region).
If compiled without OpenMP this is implemented as a simple serial prefix sum that should be auto-parallelized by the Cray MTA/XMT compiler.
n | The input array size. |
ary | The input array pointer. |
Referenced by main(), stinger_save_to_file(), and stinger_sort_actions().
void print_initial_graph_stats | ( | int64_t | nv, |
int64_t | ne, | ||
int64_t | batch_size, | ||
int64_t | nbatch, | ||
int64_t | naction | ||
) |
Prints basic statistics about the graph loaded from disk.
nv | Number of vertices |
ne | Number of edges |
batch_size | Batch size of the action stream (specified on the command line) |
nbatch | Number of batches (specified on the command line) |
naction | Number of edge actions in the action stream |
void radix_sort_pairs | ( | int64_t * | x, |
int64_t | length, | ||
int64_t | numBits | ||
) |
A radix sort for edge tuples.
This function replaces the bucket_sort_pairs() in STINGER.
x | Input array of tuples sort |
length | Number of tuples to sort |
numBits | Number of bits to use for the radix |
References xmalloc().
void snarf_actions | ( | const char * | action_stream_name, |
int64_t * | naction_out, | ||
int64_t ** | action_out, | ||
int64_t ** | mem_handle | ||
) |
Loads an action file from disk.
Updates to the initial graph are stored in an action stream file. This file may have been generated by genstreams. The actions are packed in source/destination pairs. A deletion is indicated if both source and destination vertex IDs are complemented.
action_stream_name | Filename/path to the action file on disk |
naction_out | Returns the number of actions loaded |
action_out | Returns the array of actions |
mem_handle | Returns a pointer to the block of memory allocated, for freeing later |
References bs64_n(), endian_check, and xmalloc().
Referenced by load_graph_and_action_stream().
void snarf_graph | ( | const char * | initial_graph_name, |
int64_t * | nv_out, | ||
int64_t * | ne_out, | ||
int64_t ** | off_out, | ||
int64_t ** | ind_out, | ||
int64_t ** | weight_out, | ||
int64_t ** | mem_handle | ||
) |
Load an input graph from disk.
This function loads a CSR-like binary representation of a graph from disk. The input file may have been generated by genstreams. Edges are stored as a vertex offset plus destination vertex ID and weight.
initial_graph_name | Filename/path of the initial graph file on disk |
nv_out | Returns number of vertices |
ne_out | Returns number of edges |
off_out | Returns the offset array indicating the start of each vertex's edges |
ind_out | Returns the edges sorted by source vertex |
weight_out | Returns the integer weights corresponding to ind_out[] |
mem_handle | Returns a pointer to the block of memory allocated, for freeing later |
References bs64_n(), endian_check, and xmalloc().
Referenced by load_graph_and_action_stream().
void stinger_sort_edge_list | ( | const struct stinger * | S, |
const int64_t | srcvtx, | ||
const int64_t | type | ||
) |
For a given vertex and edge type in STINGER, sort the adjacency list.
This function sorts the linked block data structure inside STINGER for a particular vertex ID and edge type. Since STINGER is assumed to be changing, we cannot guarantee that the adjacency list will remain sorted. We provide this function such that some algorithms may see a small speed-up if sorted or partially sorted.
This function is currently EXPERIMENTAL. There are known bugs. Please report bugs to the development team.
S | The STINGER data structure |
srcvtx | Vertex ID of the adjacency list to sort |
type | Edge type of the adjacency list to sort |
References ebpool, stinger_eb::edges, stinger_vb::edges, stinger_eb::etype, stinger_eb::high, stinger_eb::largeStamp, stinger::LVA, stinger_edge::neighbor, stinger_eb::next, stinger_eb::numEdges, stinger_eb::smallStamp, stinger_eb_is_blank(), STINGER_EDGEBLOCKSIZE, stinger_edge::timeFirst, stinger_edge::timeRecent, and stinger_edge::weight.
void stinger_to_sorted_csr | ( | const struct stinger * | G, |
const int64_t | nv, | ||
int64_t ** | off_out, | ||
int64_t ** | ind_out, | ||
int64_t ** | weight_out, | ||
int64_t ** | timefirst_out, | ||
int64_t ** | timerecent_out, | ||
int64_t ** | type_out | ||
) |
Converts a STINGER data structure to sorted compressed sparse row (CSR)
Returns all data contained in the STINGER structure for vertices 0 to nv in CSR format. In this format, the adjacencies of a vertex v are stored in ind_out[] starting at index off_out[v] and ending at off_out[v+1]. The adjacencies of a particular vertex will be sorted in this range. The metadata for the edges is stored at the same offset in each corresponding metadata array. Optionally any of the output pointers for these arrays can be NULL and the array will not be returned.
G | The STINGER data structure |
nv | Number of vertices |
off_out | Returns the vertex offset array |
ind_out | Returns the destination edge array |
weight_out | OPTIONAL Returns the destination edge weight array |
timefirst_out | OPTIONAL Returns the destination edge timefirst array |
timerecent_out | OPTIONAL Returns the destination edge timerecent array |
type_out | OPTIONAL Returns the destination edge type array |
References copy_metadata_based_on_pointer_array(), csr_cmp(), stinger_to_unsorted_csr(), and xmalloc().
Referenced by stinger_consistency_check().
void stinger_to_unsorted_csr | ( | const struct stinger * | G, |
const int64_t | nv, | ||
int64_t ** | off_out, | ||
int64_t ** | ind_out, | ||
int64_t ** | weight_out, | ||
int64_t ** | timefirst_out, | ||
int64_t ** | timerecent_out, | ||
int64_t ** | type_out | ||
) |
Converts a STINGER data structure to compressed sparse row (CSR)
Returns all data contained in the STINGER structure for vertices 0 to nv in CSR format. In this format, the adjacencies of a vertex v are stored in ind_out[] starting at index off_out[v] and ending at off_out[v+1]. The adjacencies of a particular vertex will not be sorted. The metadata for the edges is stored at the same offset in each corresponding metadata array. Optionally any of the output pointers for these arrays can be NULL and the array will not be returned.
G | The STINGER data structure |
nv | Number of vertices |
off_out | Returns the vertex offset array |
ind_out | Returns the destination edge array |
weight_out | OPTIONAL Returns the destination edge weight array |
timefirst_out | OPTIONAL Returns the destination edge timefirst array |
timerecent_out | OPTIONAL Returns the destination edge timerecent array |
type_out | OPTIONAL Returns the destination edge type array |
References stinger_gather_successors(), stinger_outdegree(), xcalloc(), and xmalloc().
Referenced by stinger_to_sorted_csr().
void usage | ( | FILE * | out, |
char * | progname | ||
) |
Prints command line input information and defaults to the screen.
out | Where to write, likely stderr. |
progname | The program binary, likely argv[0]. |
References ACTION_STREAM_NAME_DEFAULT, BATCH_SIZE_DEFAULT, INITIAL_GRAPH_NAME_DEFAULT, and NBATCH_DEFAULT.
Referenced by parse_args().
comments powered by Disqus