BigInt loss accuracy

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

BigInt loss accuracy

Derek Wang-2
sqlite biggest int is supposedly 9,223,372,036,854,775,807 (9 and a bit Quintillion), but when the number is larger than 1E+17, it loses some accuracy when retrieving.  see the following python codes:

#store into sqlite:
import sqlite3

sql_1 = """ CREATE TABLE IF NOT EXISTS bi (i integer,bi bigint,bi2 blob); """
conn = sqlite3.connect("bigint.db")
c = conn.cursor()
c.execute(sql_1)
c.execute("delete from bi")
conn.commit()
x = 10
for i in range(22):
    x = 10*x
    y = x + 3
    s = 'insert into bi values (%s, %s, %s)' % (i, y, y)
    c.execute(s)
    conn.commit()
conn.close()

#retrieve from sqlite
import sqlite3
import pandas as pd

#The maximum INTEGER sqlite can store as a signed BIGINT is
#   9,223,372,036,854,775,807 (9 and a bit Quintillion)
# 9E18

conn = sqlite3.connect("bigint.db")
df = pd.read_sql_query("select * from bi order by i;", conn)

m = list(df['i'])
s = list(df['bi'])
t = list(df['bi2'])

#stored as int
for j in m:
    x1 = int(m[j])
    y1 = int(s[j])
    x = 10
    for i in range(x1+1):
        x = 10*x
    y = x + 3
    if y != y1:
        print(x1, y1, y-y1)

#store as blob
for j in m:
    x1 = int(m[j])
    y1 = int(t[j])
    x = 10
    for i in range(x1+1):
        x = 10*x
    y = x + 3
    if y != y1:
        print(x1, y1, y-y1)

conn.close()

------------------------------
result:
14 10000000000000004 -1
15 100000000000000000 3
16 1000000000000000000 3
17 10000000000000000000 3
18 100000000000000000000 3
19 1000000000000000000000 3
20 10000000000000000000000 3
21 99999999999999991611392 8388611
14 10000000000000004 -1
15 100000000000000000 3
16 1000000000000000000 3
17 10000000000000000000 3
18 100000000000000000000 3
19 1000000000000000000000 3
20 10000000000000000000000 3
21 99999999999999991611392 8388611

Summary: when the integer is larger than 1E17, it starts to lose accuracy after it is stored in sqlite.

Thanks.

Derek Wang

_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|

Re: BigInt loss accuracy

Igor Tandetnik-2
On 2/23/2019 1:48 PM, Derek Wang wrote:
> x = 10
> for i in range(22):
>      x = 10*x
>      y = x + 3
>      s = 'insert into bi values (%s, %s, %s)' % (i, y, y)

Print `s`. I suspect you are losing precision on Python side, during text formatting.
--
Igor Tandetnik

_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|

Re: BigInt loss accuracy

Clemens Ladisch
In reply to this post by Derek Wang-2
Derek Wang wrote:
> sqlite biggest int is supposedly 9,223,372,036,854,775,807 (9 and a bit
> Quintillion), but when the number is larger than 1E+17, it loses some
> accuracy when retrieving.

In plain SQL, everything works fine up to the limit:

  create table t(i notoriously big integer);
  with recursive b(i) as (values (9223372036854775803) union all select i+1 from b limit 10) insert into t select i from b;
  select i from t;

  9223372036854775803
  9223372036854775804
  9223372036854775805
  9223372036854775806
  9223372036854775807
  9.22337203685478e+18
  9.22337203685478e+18
  9.22337203685478e+18
  9.22337203685478e+18
  9.22337203685478e+18

Same in plain Python, when using %s formatting:

  import sqlite3
  db=sqlite3.connect("':memory:")
  db.execute("create table t(i notoriously big integer)")
  for i in [10**17, 10**17+3, 10**18, 10**18+3, 10**19, 10**19+3]:
    db.execute("insert into t values(%s)" % (i,))
  for row in db.execute("select i from t"):
    print(row[0])

  100000000000000000
  100000000000000003
  1000000000000000000
  1000000000000000003
  1e+19
  1e+19

In any case, when using properly parameterized commands, you will not be
able to insert values that are too large:

  db.execute("select ?", (9223372036854775807,)).fetchall()

  [(9223372036854775807,)]

  db.execute("select ?", (9223372036854775808,)).fetchall()

  Traceback (most recent call last):
    File "<stdin>", line 1, in <module>
  OverflowError: Python int too large to convert to SQLite INTEGER


Regards,
Clemens
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|

Re: BigInt loss accuracy

David Raymond
In reply to this post by Derek Wang-2
Your method of storing works fine, but Pandas looks like it's doing something weird to your results. When I run this without pandas my output lines don't start until 17, which is the first row things start getting stored as and returned as floats, and matches up perfectly as the first row over 2^63 - 1

So I'd next look into whatever it is that Pandas is doing.


import contextlib
import sqlite3

tblSQL = "Create table if not exists bi (i integer, bi bigint, bi2 blob);"
with contextlib.closing(sqlite3.connect(":memory:", isolation_level = None)) as conn:
    with contextlib.closing(conn.cursor()) as cur:
        cur.execute(tblSQL)
        x = 10
        for i in range(22):
            x *= 10
            y = x + 3
            s = 'insert into bi values (%s, %s, %s)' % (i, y, y)
            print(s)
            cur.execute(s)
        print()
        cur.execute("select * from bi order by i;")
        for record in cur:
            print(record[0], type(record[1]), record[1], type(record[2]), record[2])


Prints out this:


insert into bi values (0, 103, 103)
insert into bi values (1, 1003, 1003)
insert into bi values (2, 10003, 10003)
insert into bi values (3, 100003, 100003)
insert into bi values (4, 1000003, 1000003)
insert into bi values (5, 10000003, 10000003)
insert into bi values (6, 100000003, 100000003)
insert into bi values (7, 1000000003, 1000000003)
insert into bi values (8, 10000000003, 10000000003)
insert into bi values (9, 100000000003, 100000000003)
insert into bi values (10, 1000000000003, 1000000000003)
insert into bi values (11, 10000000000003, 10000000000003)
insert into bi values (12, 100000000000003, 100000000000003)
insert into bi values (13, 1000000000000003, 1000000000000003)
insert into bi values (14, 10000000000000003, 10000000000000003)
insert into bi values (15, 100000000000000003, 100000000000000003)
insert into bi values (16, 1000000000000000003, 1000000000000000003)
insert into bi values (17, 10000000000000000003, 10000000000000000003)
insert into bi values (18, 100000000000000000003, 100000000000000000003)
insert into bi values (19, 1000000000000000000003, 1000000000000000000003)
insert into bi values (20, 10000000000000000000003, 10000000000000000000003)
insert into bi values (21, 100000000000000000000003, 100000000000000000000003)

0 <class 'int'> 103 <class 'int'> 103
1 <class 'int'> 1003 <class 'int'> 1003
2 <class 'int'> 10003 <class 'int'> 10003
3 <class 'int'> 100003 <class 'int'> 100003
4 <class 'int'> 1000003 <class 'int'> 1000003
5 <class 'int'> 10000003 <class 'int'> 10000003
6 <class 'int'> 100000003 <class 'int'> 100000003
7 <class 'int'> 1000000003 <class 'int'> 1000000003
8 <class 'int'> 10000000003 <class 'int'> 10000000003
9 <class 'int'> 100000000003 <class 'int'> 100000000003
10 <class 'int'> 1000000000003 <class 'int'> 1000000000003
11 <class 'int'> 10000000000003 <class 'int'> 10000000000003
12 <class 'int'> 100000000000003 <class 'int'> 100000000000003
13 <class 'int'> 1000000000000003 <class 'int'> 1000000000000003
14 <class 'int'> 10000000000000003 <class 'int'> 10000000000000003
15 <class 'int'> 100000000000000003 <class 'int'> 100000000000000003
16 <class 'int'> 1000000000000000003 <class 'int'> 1000000000000000003
17 <class 'float'> 1e+19 <class 'float'> 1e+19
18 <class 'float'> 1e+20 <class 'float'> 1e+20
19 <class 'float'> 1e+21 <class 'float'> 1e+21
20 <class 'float'> 1e+22 <class 'float'> 1e+22
21 <class 'float'> 1e+23 <class 'float'> 1e+23


-----Original Message-----
From: sqlite-users [mailto:[hidden email]] On Behalf Of Derek Wang
Sent: Saturday, February 23, 2019 1:49 PM
To: [hidden email]
Subject: [sqlite] BigInt loss accuracy

sqlite biggest int is supposedly 9,223,372,036,854,775,807 (9 and a bit Quintillion), but when the number is larger than 1E+17, it loses some accuracy when retrieving.  see the following python codes:

#store into sqlite:
import sqlite3

sql_1 = """ CREATE TABLE IF NOT EXISTS bi (i integer,bi bigint,bi2 blob); """
conn = sqlite3.connect("bigint.db")
c = conn.cursor()
c.execute(sql_1)
c.execute("delete from bi")
conn.commit()
x = 10
for i in range(22):
    x = 10*x
    y = x + 3
    s = 'insert into bi values (%s, %s, %s)' % (i, y, y)
    c.execute(s)
    conn.commit()
conn.close()

#retrieve from sqlite
import sqlite3
import pandas as pd

#The maximum INTEGER sqlite can store as a signed BIGINT is
#   9,223,372,036,854,775,807 (9 and a bit Quintillion)
# 9E18

conn = sqlite3.connect("bigint.db")
df = pd.read_sql_query("select * from bi order by i;", conn)

m = list(df['i'])
s = list(df['bi'])
t = list(df['bi2'])

#stored as int
for j in m:
    x1 = int(m[j])
    y1 = int(s[j])
    x = 10
    for i in range(x1+1):
        x = 10*x
    y = x + 3
    if y != y1:
        print(x1, y1, y-y1)

#store as blob
for j in m:
    x1 = int(m[j])
    y1 = int(t[j])
    x = 10
    for i in range(x1+1):
        x = 10*x
    y = x + 3
    if y != y1:
        print(x1, y1, y-y1)

conn.close()

------------------------------
result:
14 10000000000000004 -1
15 100000000000000000 3
16 1000000000000000000 3
17 10000000000000000000 3
18 100000000000000000000 3
19 1000000000000000000000 3
20 10000000000000000000000 3
21 99999999999999991611392 8388611
14 10000000000000004 -1
15 100000000000000000 3
16 1000000000000000000 3
17 10000000000000000000 3
18 100000000000000000000 3
19 1000000000000000000000 3
20 10000000000000000000000 3
21 99999999999999991611392 8388611

Summary: when the integer is larger than 1E17, it starts to lose accuracy after it is stored in sqlite.

Thanks.

Derek Wang

_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users