SQLite Forum

Sum precision ?
Login
If I "fix" you C code so that it mimics what my particular implementation of sum does (which uses a float128 intermediate), the answers are, strangely enough, identical.  Be careful when comparing a "two baskets of fruit" that your baskets are the same size, and that you "sum up" your apples and oranges in the same order.

```
sqlite> create table S(
   ...> id integer primary key not null,
   ...> value real
   ...> );
sqlite>
sqlite> WITH RECURSIVE
   ...>   cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<6000008)
   ...> insert into S select x as id, iif((x % 2) == 0, 0.0000000123, 12345.000000000) as value
   ...> from cnt;
sqlite>
sqlite> --select * from S limit 5;
sqlite>
sqlite> select printf('count %d, sum: %!35.6f', count(*), sum(value)) from S;
┌─────────────────────────────────────────────────────────┐
│ printf('count %d, sum: %!35.6f', count(*), sum(value))  │
├─────────────────────────────────────────────────────────┤
│ count 6000008, sum:                  37035049380.036903 │
└─────────────────────────────────────────────────────────┘
sqlite> ^Z

#include <stdio.h>

int main()
{
    #define DSIZE 6000008
    static double d[DSIZE];

    for (int i = 0; i < DSIZE; i++)
    {
          d[i] = (i % 2) == 1 ? 0.0000000123 : 12345.000000000;
    }

    __float128 sum = 0.0;
    for (int i = 0; i < DSIZE; i++)
    {
          sum += d[i];
    }
    printf("count: %d, sum: %35.6f\n", DSIZE, (double)sum);
    return 0;
}

count: 6000008, sum:                  37035049380.036903
```

So I would presume that the result achieved in non-extended (ie, strict double) would be the same as well.

```
#include <stdio.h>

int main()
{
    #define DSIZE 6000008
    static double d[DSIZE];

    for (int i = 0; i < DSIZE; i++)
    {
          d[i] = (i % 2) == 1 ? 0.0000000123 : 12345.000000000;
    }

    double sum = 0.0;
    for (int i = 0; i < DSIZE; i++)
    {
          sum += d[i];
    }
    printf("count: %d, sum: %35.6f\n", DSIZE, (double)sum);
    return 0;
}

count: 6000008, sum:                  37035049380.000153
```

And shiver me timbers, they are!  (Except that your original did not have the necessary ! to do "extended formatting" of the "dodgy bits" of floating point)

Note also that Richard's "decimal_sum" does indeed arrive at the *exact* answer, though it is mucho slower than the floating point methods.

```
sqlite> create table S(
   ...> id integer primary key not null,
   ...> value real
   ...> );
sqlite>
sqlite> WITH RECURSIVE
   ...>   cnt(x) AS (VALUES(1) UNION ALL SELECT x+1 FROM cnt WHERE x<6000008)
   ...> insert into S select x as id, iif((x % 2) == 0, 0.0000000123, 12345.000000000) as value
   ...> from cnt;
sqlite>
sqlite> --select * from S limit 5;
sqlite> .timer on
sqlite>
sqlite> select printf('count %d, sum: %!35.6f', count(*), sum(value)) from S;
┌─────────────────────────────────────────────────────────┐
│ printf('count %d, sum: %!35.6f', count(*), sum(value))  │
├─────────────────────────────────────────────────────────┤
│ count 6000008, sum:                  37035049380.036903 │
└─────────────────────────────────────────────────────────┘
Run Time: real 0.627 user 0.625000 sys 0.000000
sqlite> select decimal_sum(value) from S;
┌────────────────────────┐
│   decimal_sum(value)   │
├────────────────────────┤
│ 37035049380.0369000492 │
└────────────────────────┘
Run Time: real 9.944 user 9.937500 sys 0.000000
sqlite>
```

You will note that the version using float128 intermediates is accurate to 15 significant digits, the version using extended precision (float64x or 80-bit) is accurate to 14 digits, and the one using strictly doubles to 13 digits, even though it should, on average, be correct to 1 ULP or 1e-5.  There is quite a lot of error introduced because you did not "sort" the numbers before summing them.