summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2020-09-14 12:18:44 +0200
committerDamjan Marion <damarion@cisco.com>2020-09-14 22:15:49 +0200
commit9514781064ef80e0bf46b0a2df7a7088034033d9 (patch)
treee5a07471730ccc54285015094593481a7d2e4284
parentf99a7d64476fc25636a1df200b256df445d9e446 (diff)
l2: allocate l2fib only when needed
Currently l2 fib allocates 512MB hash table unconditionally on startup. This patch postpones table creation up to the point where first interface is put into l2 mode or mac entry is added. In addition it reduces default table size to 128MB and increases number of buckets 4 times. This default setting should be enough to keep 1M mac entries. Also, new startup.conf section is added which allows user to change memory and bucket size. .i.e: l2fib { table-size 512M num-buckets 524288 } Type: improvement Change-Id: I2a29209aa3545181f0087544c97a54d8157b6ec5 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/vnet/l2/l2_bd.c4
-rw-r--r--src/vnet/l2/l2_fib.c78
-rw-r--r--src/vnet/l2/l2_fib.h15
-rw-r--r--src/vnet/l2/l2_input.c3
-rw-r--r--src/vpp/conf/startup.conf9
5 files changed, 99 insertions, 10 deletions
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
index e6fb6223d93..94852c90769 100644
--- a/src/vnet/l2/l2_bd.c
+++ b/src/vnet/l2/l2_bd.c
@@ -1226,9 +1226,13 @@ int
bd_add_del (l2_bridge_domain_add_del_args_t * a)
{
bd_main_t *bdm = &bd_main;
+ l2fib_main_t *fm = &l2fib_main;
vlib_main_t *vm = bdm->vlib_main;
int rv = 0;
+ if (fm->mac_table_initialized == 0)
+ l2fib_table_init ();
+
u32 bd_index = bd_find_index (bdm, a->bd_id);
if (a->is_add)
{
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
index 983e0218bfc..0ce4eb84a6b 100644
--- a/src/vnet/l2/l2_fib.c
+++ b/src/vnet/l2/l2_fib.c
@@ -285,6 +285,12 @@ show_l2fib (vlib_main_t * vm,
break;
}
+ if (msm->mac_table_initialized == 0)
+ {
+ vlib_cli_output (vm, "no l2fib entries");
+ return 0;
+ }
+
BV (clib_bihash_foreach_key_value_pair)
(&msm->mac_table, l2fib_show_walk_cb, &ctx);
@@ -341,6 +347,18 @@ VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
};
/* *INDENT-ON* */
+void
+l2fib_table_init (void)
+{
+ l2fib_main_t *mp = &l2fib_main;
+
+ if (mp->mac_table_initialized == 1)
+ return;
+
+ BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
+ mp->mac_table_n_buckets, mp->mac_table_memory_size);
+ mp->mac_table_initialized = 1;
+}
/* Remove all entries from the l2fib */
void
@@ -348,10 +366,12 @@ l2fib_clear_table (void)
{
l2fib_main_t *mp = &l2fib_main;
+ if (mp->mac_table_initialized == 0)
+ return;
+
/* Remove all entries */
BV (clib_bihash_free) (&mp->mac_table);
- BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
- L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+ l2fib_table_init ();
l2learn_main.global_learn_count = 0;
}
@@ -412,6 +432,9 @@ l2fib_add_entry (const u8 * mac, u32 bd_index,
l2learn_main_t *lm = &l2learn_main;
BVT (clib_bihash_kv) kv;
+ if (fm->mac_table_initialized == 0)
+ l2fib_table_init ();
+
/* set up key */
key.raw = l2fib_make_key (mac, bd_index);
kv.key = key.raw;
@@ -555,7 +578,6 @@ static clib_error_t *
l2fib_test_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- clib_error_t *error = 0;
u8 mac[6], save_mac[6];
u32 bd_index = 0;
u32 sw_if_index = 8;
@@ -606,6 +628,9 @@ l2fib_test_command_fn (vlib_main_t * vm,
BVT (clib_bihash_kv) kv;
l2fib_main_t *mp = &l2fib_main;
+ if (mp->mac_table_initialized == 0)
+ return clib_error_return (0, "mac table is not initialized");
+
clib_memcpy_fast (mac, save_mac, 6);
for (i = 0; i < count; i++)
@@ -631,7 +656,7 @@ l2fib_test_command_fn (vlib_main_t * vm,
}
}
- return error;
+ return 0;
}
/*?
@@ -692,6 +717,9 @@ l2fib_del_entry (const u8 * mac, u32 bd_index, u32 sw_if_index)
l2fib_main_t *mp = &l2fib_main;
BVT (clib_bihash_kv) kv;
+ if (mp->mac_table_initialized == 0)
+ return 1;
+
/* set up key */
kv.key = l2fib_make_key (mac, bd_index);
@@ -1273,10 +1301,11 @@ l2fib_init (vlib_main_t * vm)
mp->vlib_main = vm;
mp->vnet_main = vnet_get_main ();
-
- /* Create the hash table */
- BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table",
- L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE);
+ if (mp->mac_table_n_buckets == 0)
+ mp->mac_table_n_buckets = L2FIB_NUM_BUCKETS;
+ if (mp->mac_table_memory_size == 0)
+ mp->mac_table_memory_size = L2FIB_MEMORY_SIZE;
+ mp->mac_table_initialized = 0;
/* verify the key constructor is good, since it is endian-sensitive */
clib_memset (test_mac, 0, sizeof (test_mac));
@@ -1291,6 +1320,39 @@ l2fib_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (l2fib_init);
+static clib_error_t *
+lfib_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ l2fib_main_t *lm = &l2fib_main;
+ uword table_size = ~0;
+ u32 n_buckets = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-size %U", unformat_memory_size,
+ &table_size))
+ ;
+ else if (unformat (input, "num-buckets %u", &n_buckets))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (n_buckets != ~0)
+ {
+ if (!is_pow2 (n_buckets))
+ return clib_error_return (0, "num-buckets must be power of 2");
+ lm->mac_table_n_buckets = n_buckets;
+ }
+
+ if (table_size != ~0)
+ lm->mac_table_memory_size = table_size;
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (lfib_config, "l2fib");
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h
index a1dbc9db5c7..41ef4ab3461 100644
--- a/src/vnet/l2/l2_fib.h
+++ b/src/vnet/l2/l2_fib.h
@@ -24,8 +24,8 @@
/*
* The size of the hash table
*/
-#define L2FIB_NUM_BUCKETS (64 * 1024)
-#define L2FIB_MEMORY_SIZE (512<<20)
+#define L2FIB_NUM_BUCKETS (256 * 1024)
+#define L2FIB_MEMORY_SIZE (128<<20)
/* Ager scan interval is 1 minute for aging */
#define L2FIB_AGE_SCAN_INTERVAL (60.0)
@@ -45,6 +45,15 @@ typedef struct
/* hash table */
BVT (clib_bihash) mac_table;
+ /* number of buckets in the hash table */
+ uword mac_table_n_buckets;
+
+ /* hash table memory size */
+ uword mac_table_memory_size;
+
+ /* hash table initialized */
+ u8 mac_table_initialized;
+
/* per swif vector of sequence number for interface based flush of MACs */
u8 *swif_seq_num;
@@ -419,6 +428,8 @@ l2fib_lookup_4 (BVT (clib_bihash) * mac_table,
void l2fib_clear_table (void);
+void l2fib_table_init (void);
+
void
l2fib_add_entry (const u8 * mac,
u32 bd_index,
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index 542bd42c4c2..bfb0751f4e2 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -598,6 +598,9 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */
hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
config = l2input_intf_config (sw_if_index);
+ if (l2fib_main.mac_table_initialized == 0)
+ l2fib_table_init ();
+
if (config->bridge)
{
/* Interface is already in bridge mode. Undo the existing config. */
diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf
index 56610e2e315..12679da778c 100644
--- a/src/vpp/conf/startup.conf
+++ b/src/vpp/conf/startup.conf
@@ -189,3 +189,12 @@ cpu {
# per-node-counters on | off, defaults to none
# update-interval <f64-seconds>, sets the segment scrape / update interval
# }
+
+## L2 FIB
+# l2fib {
+ ## l2fib hash table size.
+ # table-size 512M
+
+ ## l2fib hash table number of buckets. Must be power of 2.
+ # num-buckets 524288
+# }
'n576' href='#n576'>576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
/* 
 *------------------------------------------------------------------
 * Copyright (c) 2006-2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
#include <vppinfra/clib.h>
#include <vppinfra/vec.h>
#include <vppinfra/hash.h>
#include <pwd.h>
#include <stdarg.h>
#include <time.h>
#include "cpel.h"
#include <math.h>

char *time_format = "%.03d:%.02d:%.02d:%.03d:%.03d ";
static char version[] = "cpelstate 2.0h";

#define USEC_PER_MS 1000LL
#define USEC_PER_SECOND (1000*USEC_PER_MS)
#define USEC_PER_MINUTE (60*USEC_PER_SECOND)
#define USEC_PER_HOUR (60*USEC_PER_MINUTE)

uword *the_strtab_hash; /* (name, base-VA) hash of all string tables */
uword *the_evtdef_hash; /* (event-id, event-definition) hash */
uword *the_trackdef_hash; /* (track-id, track-definition) hash */

f64 ticks_per_us;
u32 state_event_code = 1;       /* default: XR thread-on-cpu */
int exclude_kernel_from_summary_stats=1;
int summary_stats_only;
int scatterplot;
u8 *name_filter;

typedef enum {
    SORT_MAX_TIME=1,
    SORT_MAX_OCCURRENCES,
    SORT_NAME,
} sort_t;

sort_t sort_type = SORT_MAX_TIME;

int widest_name_format=5;
int widest_track_format=5;

typedef struct bound_event_ {
    u32 event_code;
    u8  *event_str;
    u8  *datum_str;
    u32  is_strtab_ref;
} bound_event_t;

bound_event_t *bound_events;

typedef struct bound_track_ {
    u32 track;
    u8  *track_str;
    u64 *ticks_in_state; /* vector of state occurrences */
    f64  mean_ticks_in_state;
    f64  variance_ticks_in_state;
    f64  total_ticks_in_state;
} bound_track_t;

bound_track_t *bound_tracks;

void fatal(char *s)
{
    fprintf(stderr, "%s", s);
    exit(1);
}

typedef enum {
    PASS1=1,
    PASS2=2,
} pass_t;

typedef struct {
    int (*pass1)(cpel_section_header_t *, int, FILE *);
    int (*pass2)(cpel_section_header_t *, int, FILE *);
} section_processor_t;

int bad_section(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    fprintf(ofp, "Bad (type 0) section, skipped...\n");
    return(0);
}

int noop_pass(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    return(0);
}

int strtab_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    uword *p;
    u8 *strtab_data_area = (u8 *)(sh+1);
    
    /* Multiple string tables with the same name are Bad... */
    p = hash_get_mem(the_strtab_hash, strtab_data_area);
    if (p) {
        fprintf(ofp, "Duplicate string table name %s", strtab_data_area);
    }
    /*
     * Looks funny, but we really do want key = first string in the
     * table, value = address(first string in the table) 
     */
    hash_set_mem(the_strtab_hash, strtab_data_area, strtab_data_area);
    if (verbose) {
        fprintf(ofp, "String Table %s\n", strtab_data_area);
    }
    return(0);
}

int evtdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    int i, nevents;
    event_definition_section_header_t *edh;
    event_definition_t *ep;
    u8 *this_strtab;
    u32 event_code;
    uword *p;
    bound_event_t *bp;
    int thislen;

    edh = (event_definition_section_header_t *)(sh+1);
    nevents = ntohl(edh->number_of_event_definitions);
    
    if (verbose) {
        fprintf(ofp, "Event Definition Section: %d definitions\n",
                nevents);
    }

    p = hash_get_mem(the_strtab_hash, edh->string_table_name);
    if (!p) {
        fprintf(ofp, "Fatal: couldn't find string table\n");
        return(1);
    }
    this_strtab = (u8 *)p[0];

    ep = (event_definition_t *)(edh+1);
    
    for (i = 0; i < nevents; i++) {
        event_code = ntohl(ep->event);
        p = hash_get(the_evtdef_hash, event_code);
        if (p) {
            fprintf(ofp, "Event %d redefined, retain first definition\n",
                    event_code);
            continue;
        }
        vec_add2(bound_events, bp, 1);
        bp->event_code = event_code;
        bp->event_str = this_strtab + ntohl(ep->event_format);
        bp->datum_str = this_strtab + ntohl(ep->datum_format);
        bp->is_strtab_ref = 0;
        /* Decide if the datum format is a %s format => strtab reference */
        {
            int j;
            int seen_percent=0;

            for (j = 0; j < strlen((char *)(bp->datum_str)); j++) {
                if (bp->datum_str[j] == '%'){
                    seen_percent=1;
                    continue;
                }
                if (seen_percent && bp->datum_str[j] == 's') {
                    bp->is_strtab_ref = 1;
                }
            }
        }
        
        hash_set(the_evtdef_hash, event_code, bp - bound_events);

        thislen = strlen((char *)bp->event_str);
        if (thislen > widest_name_format)
            widest_name_format = thislen;

        ep++;
    }
    return (0);
}

int trackdef_pass1(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    int i, nevents;
    track_definition_section_header_t *tdh;
    track_definition_t *tp;
    u8 *this_strtab;
    u32 track_code;
    uword *p;
    bound_track_t *btp;
    int thislen;

    tdh = (track_definition_section_header_t *)(sh+1);
    nevents = ntohl(tdh->number_of_track_definitions);
    
    if (verbose) {
        fprintf(ofp, "Track Definition Section: %d definitions\n",
                nevents);
    }

    p = hash_get_mem(the_strtab_hash, tdh->string_table_name);
    if (!p) {
        fprintf(ofp, "Fatal: couldn't find string table\n");
        return(1);
    }
    this_strtab = (u8 *)p[0];

    tp = (track_definition_t *)(tdh+1);
    
    for (i = 0; i < nevents; i++) {
        track_code = ntohl(tp->track);
        p = hash_get(the_trackdef_hash, track_code);
        if (p) {
            fprintf(ofp, "track %d redefined, retain first definition\n",
                    track_code);
            continue;
        }
        vec_add2(bound_tracks, btp, 1);
        btp->track = track_code;
        btp->track_str = this_strtab + ntohl(tp->track_format);
        hash_set(the_trackdef_hash, track_code, btp - bound_tracks);

        thislen = strlen((char *)(btp->track_str));
        if (thislen > widest_track_format)
            widest_track_format = thislen;
        tp++;
    }
    return (0);
}

int unsupported_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    if (verbose) {
        fprintf(ofp, "Unsupported type %d section\n",
                ntohl(sh->section_type));
    }
    return(0);
}

int event_pass2(cpel_section_header_t *sh, int verbose, FILE *ofp)
{
    event_section_header_t *eh;
    u32 track_code;
    int nevents;
    int i;
    uword *p;
    event_entry_t *ep;
    u64 now;
    u32 time0, time1;
    bound_track_t generic_track;
    u32 last_track_code;
    u64 state_start_ticks=0;
    u64 ticks_in_state;
    bound_track_t *state_track=0;
    int in_state=0;
    generic_track.track_str = (u8 *) "%d";
    last_track_code = 0xdeafbeef;

    eh = (event_section_header_t *)(sh+1);
    nevents = ntohl(eh->number_of_events);
    ticks_per_us = ((double)ntohl(eh->clock_ticks_per_second))/1e6;

    if (verbose) {
        fprintf(ofp, "%.3f ticks_per_us\n", ticks_per_us);
    }

    ep = (event_entry_t *)(eh+1);

    p = hash_get_mem(the_strtab_hash, eh->string_table_name);
    if (!p) {
        fprintf(ofp, "Fatal: couldn't find string table\n");
        return(1);
    }

    for (i = 0; i < nevents; i++) {
        time0 = ntohl (ep->time[0]);
        time1 = ntohl (ep->time[1]);

        now = (((u64) time0)<<32) | time1;

        /* Found the state-change event ? */
        if (ntohl(ep->event_code) == state_event_code) {
            /*
             * Add a ticks-in-state record, unless
             * this is the "prime mover" event instance 
             */
            if (in_state) {
                ticks_in_state = now - state_start_ticks;
                vec_add1(state_track->ticks_in_state, ticks_in_state);
            }
            /* switch to now-current track */
            state_start_ticks = now;
            track_code = ntohl(ep->track);
            if (track_code != last_track_code) {
                p = hash_get(the_trackdef_hash, track_code);
                if (p) {
                    state_track = &bound_tracks[p[0]];
                } else {
                    state_track = &generic_track;
                }
                last_track_code = track_code;
            }
            in_state = 1;
        }
        ep++;
    }
    return(0);
}

/* 
 * Note: If necessary, add passes / columns to this table to 
 * handle section order dependencies.
 */

section_processor_t processors[CPEL_NUM_SECTION_TYPES+1] =
{
    {bad_section,	noop_pass}, 		/* type 0 -- f**ked */
    {strtab_pass1, 	noop_pass}, 		/* type 1 -- STRTAB */
    {unsupported_pass,  noop_pass}, 		/* type 2 -- SYMTAB */
    {evtdef_pass1,      noop_pass},             /* type 3 -- EVTDEF */
    {trackdef_pass1,    noop_pass},		/* type 4 -- TRACKDEF */
    {noop_pass,         event_pass2},           /* type 5 -- EVENTS */
};


int process_section(cpel_section_header_t *sh, int verbose, FILE *ofp,
                    pass_t pass)
{
    u32 type;
    type = ntohl(sh->section_type);
    int rv;
    int (*fp)(cpel_section_header_t *, int, FILE *);

    if (type > CPEL_NUM_SECTION_TYPES) {
        fprintf(stderr, "Unknown section type %d\n", type);
        return(1);
    }
    switch(pass) {
    case PASS1:
        fp = processors[type].pass1;
        break;

    case PASS2:
        fp = processors[type].pass2;
        break;
        
    default:
        fprintf(stderr, "Unknown pass %d\n", pass);
        return(1);
    }

    rv = (*fp)(sh, verbose, ofp);

    return(rv);
}

int cpel_dump_file_header(cpel_file_header_t *fh, int verbose, FILE *ofp)
{
    time_t file_time;

    if (verbose) {
        fprintf(ofp, "CPEL file: %s-endian, version %d\n",
                ((fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) ? 
                 "little" : "big"), 
                fh->endian_version & CPEL_FILE_VERSION_MASK);

        file_time = ntohl(fh->file_date);
        
        fprintf(ofp, "File created %s", ctime(&file_time));
        fprintf(ofp, "File has %d sections\n", 
                ntohs(fh->nsections));
    }

    return(0);
}


int cpel_dump(u8 *cpel, int verbose, FILE *ofp)
{
    cpel_file_header_t *fh;
    cpel_section_header_t *sh;
    u16 nsections;
    u32 section_size;
    int i;

    /* First, the file header */
    fh = (cpel_file_header_t *)cpel;
    if (fh->endian_version != CPEL_FILE_VERSION) {
        if (fh->endian_version & CPEL_FILE_LITTLE_ENDIAN) {
            fprintf(stderr, "Little endian data format not supported\n");
            return(1);
        }
        fprintf(stderr, "Unsupported file version 0x%x\n", 
                fh->endian_version);
        return(1);
    }
    cpel_dump_file_header(fh, verbose, ofp);
    nsections = ntohs(fh->nsections);

    /*
     * Take two passes through the file. PASS1 builds
     * data structures, PASS2 actually dumps the file.
     * Just in case the sections are in an unobvious order.
     */
    sh = (cpel_section_header_t *)(fh+1);
    for (i = 0; i < nsections; i++) {
        section_size = ntohl(sh->data_length);

        if(verbose) {
            fprintf(ofp, "Section type %d, size %d\n", ntohl(sh->section_type),
                    section_size);
        }

        if(process_section(sh, verbose, ofp, PASS1))
            return(1);

        sh++;
        sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
    }

    sh = (cpel_section_header_t *)(fh+1);
    for (i = 0; i < nsections; i++) {
        if(process_section(sh, verbose, ofp, PASS2))
            return(1);
        section_size = ntohl(sh->data_length);
        sh++;
        sh = (cpel_section_header_t *)(((u8 *)sh)+section_size);
    }
    return(0);
}

void compute_state_statistics(int verbose, FILE *ofp)
{
    int i, j;
    bound_track_t *bp;
    f64 fticks;

    /* Across the bound tracks */
    for (i = 0; i < vec_len(bound_tracks); i++) {
        bp = &bound_tracks[i];
        bp->mean_ticks_in_state = 0.0;
        bp->variance_ticks_in_state = 0.0;
        bp->total_ticks_in_state = 0.0;
        for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
            bp->total_ticks_in_state += (f64) bp->ticks_in_state[j];
        }
        /* Compute mean */
        if (vec_len(bp->ticks_in_state)) {
            bp->mean_ticks_in_state = bp->total_ticks_in_state / 
                ((f64) vec_len(bp->ticks_in_state));
        }
        /* Accumulate sum: (Xi-Xbar)**2 */
        for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
            fticks = bp->ticks_in_state[j];
            bp->variance_ticks_in_state += 
                (fticks - bp->mean_ticks_in_state)*
                (fticks - bp->mean_ticks_in_state);
        }
        /* Compute s**2, the unbiased estimator of sigma**2 */
        if (vec_len(bp->ticks_in_state) > 1) {
            bp->variance_ticks_in_state /= (f64) 
                (vec_len(bp->ticks_in_state)-1);
        }
    }
}

int track_compare_max (const void *arg1, const void *arg2)
{
    bound_track_t *a1 = (bound_track_t *)arg1;
    bound_track_t *a2 = (bound_track_t *)arg2;
    f64 v1, v2;

    v1 = a1->total_ticks_in_state;
    v2 = a2->total_ticks_in_state;
    
    if (v1 < v2)
        return (1);
    else if (v1 == v2)
        return (0);
    else return (-1);
}

int track_compare_occurrences (const void *arg1, const void *arg2)
{
    bound_track_t *a1 = (bound_track_t *)arg1;
    bound_track_t *a2 = (bound_track_t *)arg2;
    f64 v1, v2;

    v1 = (f64) vec_len(a1->ticks_in_state);
    v2 = (f64) vec_len(a2->ticks_in_state);
    
    if (v1 < v2)
        return (1);
    else if (v1 == v2)
        return (0);
    else return (-1);
}

int track_compare_name (const void *arg1, const void *arg2)
{
    bound_track_t *a1 = (bound_track_t *)arg1;
    bound_track_t *a2 = (bound_track_t *)arg2;

    return (strcmp((char *)(a1->track_str), (char *)(a2->track_str)));
}

void sort_state_statistics(sort_t type, FILE *ofp)
{
    int (*compare)(const void *, const void *)=0;

    if (summary_stats_only)
        return;

    switch(type) {
    case SORT_MAX_TIME:
        fprintf(ofp, "Results sorted by max time in state.\n");
        compare = track_compare_max;
        break;

    case SORT_MAX_OCCURRENCES:
        fprintf(ofp, "Results sorted by max occurrences of state.\n");
        compare = track_compare_occurrences;
        break;

    case SORT_NAME:
        compare = track_compare_name;
        fprintf(ofp, "Results sorted by process-id/name/thread ID\n");
        break;

    default:
        fatal("sort type not set?");
    }
    
    qsort (bound_tracks, vec_len(bound_tracks), 
           sizeof (bound_track_t), compare);    
}

void print_state_statistics(int verbose, FILE *ofp)
{
    int i,j;
    u8 *trackpad;
    bound_track_t *bp;
    f64 total_time = 0.0;
    f64 total_switches = 0.0;

    trackpad = format(0, "%%-%ds ", widest_track_format);
    vec_add1(trackpad, 0);

    if (!summary_stats_only) {
        fprintf(ofp, (char *)trackpad, "ProcThread");
        fprintf(ofp, "  Mean(us)     Stdev(us)   Total(us)      N\n");
    }
        
    for (i = 0; i < vec_len(bound_tracks); i++) {
        bp = &bound_tracks[i];
        if (bp->mean_ticks_in_state == 0.0)
            continue;

        if (name_filter &&
            strncmp((char *)(bp->track_str), (char *)name_filter, 
                    strlen((char *)name_filter)))
            continue;

        /*
         * Exclude kernel threads (e.g. idle thread) from
         * state statistics 
         */
        if (exclude_kernel_from_summary_stats && 
            !strncmp((char *)(bp->track_str), "kernel ", 7))
            continue;

        total_switches += (f64) vec_len(bp->ticks_in_state);
        
        if (!summary_stats_only) {
            fprintf(ofp, (char *) trackpad, bp->track_str);
            fprintf(ofp, "%10.3f +- %10.3f", 
                    bp->mean_ticks_in_state / ticks_per_us,
                    sqrt(bp->variance_ticks_in_state) 
                    / (f64) ticks_per_us);
            fprintf(ofp, "%12.3f", 
                    bp->total_ticks_in_state / ticks_per_us);
            fprintf(ofp, "%8d\n", (int)vec_len(bp->ticks_in_state));
        }

        if (scatterplot) {
            for (j = 0; j < vec_len(bp->ticks_in_state); j++) {
                fprintf(ofp, "%.3f\n", 
                        (f64)bp->ticks_in_state[j] / ticks_per_us);
            }
        }

        total_time += bp->total_ticks_in_state;
    }
    
    if (!summary_stats_only)
        fprintf(ofp, "\n");
    fprintf(ofp, "Note: the following statistics %s kernel-thread activity.\n",
            exclude_kernel_from_summary_stats ? "exclude" : "include");
    if (name_filter)
        fprintf(ofp, 
                "Note: only pid/proc/threads matching '%s' are included.\n",
                name_filter);

    fprintf(ofp, 
      "Total runtime: %10.3f (us), Total state switches: %.0f\n", 
            total_time / ticks_per_us, total_switches);
    fprintf(ofp, "Average time in state: %10.3f (us)\n",
            (total_time / total_switches) / ticks_per_us);
}

char *mapfile (char *file)
{
    struct stat statb;
    char *rv;
    int maphfile;
    size_t mapfsize;
    
    maphfile = open (file, O_RDONLY);

    if (maphfile < 0)
    {
        fprintf (stderr, "Couldn't read %s, skipping it...\n", file);
        return (NULL);
    }

    if (fstat (maphfile, &statb) < 0)
    {
        fprintf (stderr, "Couldn't get size of %s, skipping it...\n", file);
        return (NULL);
    }

    /* Don't try to mmap directories, FIFOs, semaphores, etc. */
    if (! (statb.st_mode & S_IFREG)) {
        fprintf (stderr, "%s is not a regular file, skipping it...\n", file);
        return (NULL);
    }

    mapfsize = statb.st_size;

    if (mapfsize < 3)
    {
        fprintf (stderr, "%s zero-length, skipping it...\n", file);
        close (maphfile);
        return (NULL);
    }

    rv = mmap (0, mapfsize, PROT_READ, MAP_SHARED, maphfile, 0);

    if (rv == 0)
    {
        fprintf (stderr, "%s problem mapping, I quit...\n", file);
        exit (-1);
    }
    close (maphfile);
    return (rv);
}

/*
 * main 
 */
int main (int argc, char **argv)
{
    char *cpel_file = 0;
    char *outputfile = 0;
    FILE *ofp;
    char *cpel;
    int verbose=0;
    int curarg=1;

    while (curarg < argc) {
        if (!strncmp(argv[curarg], "--input-file", 3)) {
            curarg++;
            if (curarg < argc) {
                cpel_file = argv[curarg];
                curarg++;
                continue;
            }
            fatal("Missing filename after --input-file\n");
        }
        if (!strncmp(argv[curarg], "--output-file", 3)) {
            curarg ++;
            if (curarg < argc) {
                outputfile = argv[curarg];
                curarg ++;
                continue;
            }
            fatal("Missing filename after --output-file\n");
        }
        if (!strncmp(argv[curarg], "--verbose", 3)) {
            curarg++;
            verbose++;
            continue;
        }
        if (!strncmp(argv[curarg], "--scatterplot", 4)) {
            curarg++;
            scatterplot=1;
            continue;
        }

        if (!strncmp(argv[curarg], "--state-event", 4)) {
            curarg++;
            if (curarg < argc) {
                state_event_code = atol(argv[curarg]);
                curarg ++;
                continue;
            }
            fatal("Missing integer after --state-event\n");
        }
        if (!strncmp(argv[curarg], "--max-time-sort", 7)) {
            sort_type = SORT_MAX_TIME;
            curarg++;
            continue;
        }
        if (!strncmp(argv[curarg], "--max-occurrence-sort", 7)) {
            sort_type = SORT_MAX_OCCURRENCES;
            curarg++;
            continue;
        }
        if (!strncmp(argv[curarg], "--name-sort", 3)) {
            sort_type = SORT_NAME;
            curarg++;
            continue;
        }
        if (!strncmp(argv[curarg], "--kernel-included", 3)) {
            exclude_kernel_from_summary_stats = 0;
            curarg++;
            continue;
        }
        if (!strncmp(argv[curarg], "--summary", 3)) {
            summary_stats_only=1;
            curarg++;
            continue;
        }
        if (!strncmp(argv[curarg], "--filter", 3)) {
            curarg ++;
            if (curarg < argc) {
                name_filter = (u8 *)argv[curarg];
                curarg ++;
                continue;
            }
            fatal("Missing filter string after --filter\n");
        }
        

    usage:
        fprintf(stderr, 
          "cpelstate --input-file <filename> [--output-file <filename>]\n");
        fprintf(stderr, 
          "          [--state-event <decimal>] [--verbose]\n");
        fprintf(stderr, 
          "          [--max-time-sort(default) | --max-occurrence-sort |\n");

        fprintf(stderr, 
          "           --name-sort-sort] [--kernel-included]\n");

        fprintf(stderr, 
          "          [--summary-stats-only] [--scatterplot]\n");

        fprintf(stderr, "%s\n", version);
        exit(1);
    }

    if (cpel_file == 0)
        goto usage;

    cpel = mapfile(cpel_file);
    if (cpel == 0) {
        fprintf(stderr, "Couldn't map %s...\n", cpel_file);
        exit(1);
    }

    if (!outputfile) {
        ofp = fdopen(1, "w");
        if (ofp == NULL) {
            fprintf(stderr, "Couldn't fdopen(1)?\n");
            exit(1);
        }
    } else {
        ofp = fopen(outputfile, "w");
        if (ofp == NULL) {
            fprintf(stderr, "Couldn't create %s...\n", outputfile);
            exit(1);
        }
    }

    the_strtab_hash = hash_create_string (0, sizeof (uword));
    the_evtdef_hash = hash_create (0, sizeof (uword));
    the_trackdef_hash = hash_create (0, sizeof (uword));

    if (cpel_dump((u8 *) cpel, verbose, ofp)) {
        if (outputfile)
            unlink(outputfile);
    }

    compute_state_statistics(verbose, ofp);
    sort_state_statistics(sort_type, ofp);
    print_state_statistics(verbose, ofp);

    fclose(ofp);
    return(0);
}