SQL Aggregates in C


For fun, I would like to implement some of the SQL aggregates in C: SUM (sum), AVG (average), MIN (minimum), MAX (maximum), STDEV (standard deviation), VAR (variance).

To do this in C, I’ll use functions with variable number of arguments instead of a function with an array and a count of the elements in that array. To do this, I need to have a special value for the last parameter in the variable list of arguments to know when to stop getting arguments in the list. For that, I’m going to use the INT_MIN macro in the <climits> C library. For example, to get the average of the numbers 6, 3, and 19, the call would look like AVG(6, 3, 19, INT_MIN).

SUM

Implementing this in C is lame, but I’ll use it as the starting point before getting to the other functions. I would definitely prefer to write 1 + 2 + 3 in C instead of SUM(1, 2, 3, INT_MIN).

Definition

int sum (int first, ...)
{
    register int i = first;
    int sum = 0;
   
    //  declare a variable argument list
    va_list arg_list;
   
    //  initialize the arglist
    va_start (arg_list, first);
   
    //  Sum all arguments
    while(i != INT_MIN)
    {
        sum += i;
        i = va_arg (arg_list, int);
    }
   
    //  End the argument list
    va_end(arg_list);
   
    return sum;
}

Usage

printf ("Sum of (1, 2, 3, 4, 5) = %d\n", sum (1, 2, 3 , 4, 5, INT_MIN));

Output

15

AVG

Instead of writing something like (1+2+3+4)/4 or (4 + 3 + 6 + 1 + 5 + 11 + 19 + 25 + 111 + 55 + 32 + 57)/ 12, I’d rather write AVG(1, 2, 3, 4, INT_MIN) or AVG(4, 3, 6, 1, 5, 11, 19, 25, 111, 55 , 32, 57, INT_MIN) without having to count the number of numbers I’m calculating the average for.

Definition

double avg (int first, ...)
{
    register int i = first;
    int sum = 0, count = 0;
   
    va_list arg_list;
    va_start (arg_list, first);
   
    //  sum and count all arguments
    while(i != INT_MIN)
    {
        sum += i;
        count++;
        i = va_arg (arg_list, int);
    }
   
    va_end(arg_list);
   
    return (double) sum / count;
}

Usage

printf ("Average of (1, 2, 3, 4, 5) = %.2f\n", avg (1, 2, 3 , 4, 5, INT_MIN));

Output

3.00

MIN

Instead of implementing MIN by passing an array and the count of numbers in that array , I’d rather just pass all the numbers to the function without having to worry about creating an array and counting the number of elements inside it.

Definition

int min (int first, ...)
{
    register int i = first;
    int min = first;
   
    va_list arg_list;
    va_start(arg_list, first);
   
    //  Search for any other argument that is less than the first
    //  If you find one, then assign it to min
    while(i != INT_MIN)
    {
        if(i < min)
            min = i;

        i = va_arg(arg_list, int);
    }
   
    va_end(arg_list);
   
    return min;
}

Usage

printf ("Minimum of (10, 23, 3, 41, 15) = %d\n", min (10, 23, 3, 41, 15, INT_MIN));

Output

3

MAX

Gets the maximum number in a set.

Definition

int max (int first, ...)
{
    register int i = first;
    int max = first;
   
    va_list arg_list;
    va_start(arg_list, first);
   
    //  Search for any other argument that is greater than the first
    //  If you find one, then assign it to max
    while(i != INT_MIN)
    {
        if(i > max)
            max = i;

        i = va_arg(arg_list, int);
    }
   
    va_end(arg_list);
   
    return max;
}

Usage

printf ("Maximum of (10, 23, 3, 41, 15) = %d\n", max (10, 23, 3, 41, 15, INT_MIN));

Output

41

STDEV

Calculates the standard deviation for a set of numbers.

Definition

double stdev (int first, ...)
{
    register int i = first;
    int count = 0;
    int sum = 0;
    double mean = 0;
    double deviation = 0;
    va_list arg_list;

    //  Calculate the mean (average)
    va_start(arg_list, first);
   
    while(i != INT_MIN)
    {
        sum += i;
        count++;
        i = va_arg (arg_list, int);
    }
   
    va_end(arg_list);
    mean = (float) sum / count;

    //  Calculate the standard deviation
    i = first;
    va_start(arg_list, first);
   
    while(i != INT_MIN)
    {
        deviation += pow((i - mean), 2);
        i = va_arg (arg_list, int);
    }

    deviation /= count;
    deviation = sqrt(deviation);

    va_end(arg_list);
   
    return deviation;
}

Usage

printf ("Standard Deviation of (2, 4, 4, 4, 5, 5, 7, 9) = %.2f\n", stdev (2, 4, 4, 4, 5, 5, 7, 9, INT_MIN));

Output

2.00

VAR

Calculates the variance for a set of numbers.

Definition

double var (int first, ...)
{
    register int i = first;
    int count = 0;
    int sum = 0;
    double mean = 0;
    double variance = 0;
    va_list arg_list;

    //  Calculate the mean (average)
    va_start(arg_list, first);
   
    while(i != INT_MIN)
    {
        sum += i;
        count++;
        i = va_arg (arg_list, int);
    }
   
    va_end(arg_list);
    mean = (float) sum / count;

    //  Calculate the variance
    i = first;
    va_start(arg_list, first);
   
    while(i != INT_MIN)
    {
        variance += pow((i - mean), 2);
        i = va_arg (arg_list, int);
    }

    variance /= (count - 1);

    va_end(arg_list);
   
    return variance;
}

Usage

printf ("Variance of (1, 2, 3, 4, 5, 6) = %.2f\n", var (1, 2, 3, 4, 5, 6, INT_MIN));

Output

3.50


Full source code is available on my GitHub page.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s