Quantcast

Patch for consideration: auto_vacuum slack.

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Patch for consideration: auto_vacuum slack.

Scott Hess
A developer was asking me questions about auto_vacuum I/O characteristics,
because they were worried about "churn", where a page is moved to fill a
freelist gap, then soon enough a new page is allocated anyhow, so the move
wasn't really necessary.  This made me wonder if auto_vacuum recognized
that in some cases SQLITE_FCNTL_CHUNK_SIZE would make the move pointless
(because it wouldn't even save any space).

Below is a patch which implements [PRAGMA auto_vacuum_slack_pages = N].
This setting allows client code to signal that auto_vacuum can leave pages
on the freelist until releasing them would allow a db size change.  I think
this would reduce the number of auto_vacuum moves for many databases.  The
FCNTL and PRAGMA could obviously be integrated into one setting, I haven't
done that in this patch.  Also, the test in autoVacuumCommit() could
certainly be more cleanly combined with the existing code, I left it
distinct to make the logic easier to see.  I'd be happy to adjust the code
to be cleaner and write tests, mostly I didn't want to invest time in that
without floating a prototype to see if it would be an interesting addition.

Thanks,
scott


--- Patch follows ---
Index: src/btree.c
==================================================================
--- src/btree.c
+++ src/btree.c
@@ -2810,10 +2810,50 @@
   sqlite3BtreeLeave(p);
   return rc;
 #endif
 }

+/*
+** Change the 'auto-vacuum-slack-pages' property of the database. If auto
vacuum
+** is enabled, this is the number of chunks of slack to allow before
+** automatically running an incremental vacuum.
+*/
+int sqlite3BtreeSetAutoVacuumSlackPages(Btree *p, int autoVacuumSlack){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  return SQLITE_READONLY;
+#else
+  BtShared *pBt = p->pBt;
+  int rc = SQLITE_OK;
+  u8 cc = (u8)autoVacuumSlack;
+  if( autoVacuumSlack>cc ){
+    cc = 0xFF;
+  }
+
+  sqlite3BtreeEnter(p);
+  pBt->autoVacuumSlack = cc;
+  sqlite3BtreeLeave(p);
+  return rc;
+#endif
+}
+
+/*
+** Return the value of the 'auto-vacuum-slack-pages' property.
+*/
+int sqlite3BtreeGetAutoVacuumSlackPages(Btree *p){
+#ifdef SQLITE_OMIT_AUTOVACUUM
+  return 0;
+#else
+  int rc = 0;
+  sqlite3BtreeEnter(p);
+  if( p->pBt->autoVacuum!=0 ){
+    rc = p->pBt->autoVacuumSlack;
+  }
+  sqlite3BtreeLeave(p);
+  return rc;
+#endif
+}
+

 /*
 ** Get a reference to pPage1 of the database file.  This will
 ** also acquire a readlock on that file.
 **
@@ -3651,17 +3691,31 @@
 ** i.e. the database has been reorganized so that only the first *pnTrunc
 ** pages are in use.
 */
 static int autoVacuumCommit(BtShared *pBt){
   int rc = SQLITE_OK;
+  int bShouldVacuum = pBt->autoVacuum && !pBt->incrVacuum;
   Pager *pPager = pBt->pPager;
   VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); )

   assert( sqlite3_mutex_held(pBt->mutex) );
   invalidateAllOverflowCache(pBt);
   assert(pBt->autoVacuum);
-  if( !pBt->incrVacuum ){
+  if( bShouldVacuum && pBt->autoVacuumSlack ){
+    Pgno nOrig;        /* Database size before freeing */
+    Pgno nFree;        /* Number of pages on the freelist initially */
+
+    nOrig = btreePagecount(pBt);
+    nFree = get4byte(&pBt->pPage1->aData[36]);
+    bShouldVacuum =
+        (nOrig-nFree)/pBt->autoVacuumSlack < nOrig/pBt->autoVacuumSlack;
+    /* TODO: When integrating this test with the following code, contrive
to
+    ** trim to the integral chunk boundary, rather than trimming the
entire free
+    ** list.
+    */
+  }
+  if( bShouldVacuum ){
     Pgno nFin;         /* Number of pages in database after autovacuuming
*/
     Pgno nFree;        /* Number of pages on the freelist initially */
     Pgno iFree;        /* The next page to be freed */
     Pgno nOrig;        /* Database size before freeing */


Index: src/btree.h
==================================================================
--- src/btree.h
+++ src/btree.h
@@ -76,10 +76,12 @@
 int sqlite3BtreeSecureDelete(Btree*,int);
 int sqlite3BtreeGetOptimalReserve(Btree*);
 int sqlite3BtreeGetReserveNoMutex(Btree *p);
 int sqlite3BtreeSetAutoVacuum(Btree *, int);
 int sqlite3BtreeGetAutoVacuum(Btree *);
+int sqlite3BtreeSetAutoVacuumSlackPages(Btree *, int);
+int sqlite3BtreeGetAutoVacuumSlackPages(Btree *);
 int sqlite3BtreeBeginTrans(Btree*,int);
 int sqlite3BtreeCommitPhaseOne(Btree*, const char *zMaster);
 int sqlite3BtreeCommitPhaseTwo(Btree*, int);
 int sqlite3BtreeCommit(Btree*);
 int sqlite3BtreeRollback(Btree*,int,int);

Index: src/btreeInt.h
==================================================================
--- src/btreeInt.h
+++ src/btreeInt.h
@@ -410,10 +410,11 @@
   BtCursor *pCursor;    /* A list of all open cursors */
   MemPage *pPage1;      /* First page of the database */
   u8 openFlags;         /* Flags to sqlite3BtreeOpen() */
 #ifndef SQLITE_OMIT_AUTOVACUUM
   u8 autoVacuum;        /* True if auto-vacuum is enabled */
+  u8 autoVacuumSlack;   /* Optional pages of slack for auto-vacuum */
   u8 incrVacuum;        /* True if incr-vacuum is enabled */
   u8 bDoTruncate;       /* True to truncate db on commit */
 #endif
   u8 inTransaction;     /* Transaction state */
   u8 max1bytePayload;   /* Maximum first byte of cell for a 1-byte payload
*/

Index: src/pragma.c
==================================================================
--- src/pragma.c
+++ src/pragma.c
@@ -734,10 +734,32 @@
     sqlite3VdbeJumpHere(v, addr);
     break;
   }
 #endif

+  /*
+  **  PRAGMA [schema.]auto_vacuum_slack_pages(N)
+  **
+  ** Control chunk size of auto-vacuum.
+  */
+#ifndef SQLITE_OMIT_AUTOVACUUM
+  case PragTyp_AUTO_VACUUM_SLACK_PAGES: {
+    Btree *pBt = pDb->pBt;
+    assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
+    assert( pBt!=0 );
+    if( !zRight ){
+      returnSingleInt(v, sqlite3BtreeGetAutoVacuumSlackPages(pBt));
+    }else{
+      int nPages = 8;
+      if( sqlite3GetInt32(zRight, &nPages) ){
+        sqlite3BtreeSetAutoVacuumSlackPages(pBt, nPages);
+      }
+    }
+    break;
+  }
+#endif
+
 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
   /*
   **  PRAGMA [schema.]cache_size
   **  PRAGMA [schema.]cache_size=N
   **

Index: src/pragma.h
==================================================================
--- src/pragma.h
+++ src/pragma.h
@@ -46,10 +46,11 @@
 #define PragTyp_HEXKEY                        38
 #define PragTyp_KEY                           39
 #define PragTyp_REKEY                         40
 #define PragTyp_LOCK_STATUS                   41
 #define PragTyp_PARSER_TRACE                  42
+#define PragTyp_AUTO_VACUUM_SLACK_PAGES       43

 /* Property flags associated with various pragma. */
 #define PragFlg_NeedSchema 0x01 /* Force schema load before running */
 #define PragFlg_NoColumns  0x02 /* OP_ResultRow called with zero columns */
 #define PragFlg_NoColumns1 0x04 /* zero columns if RHS argument is present
*/
@@ -142,10 +143,16 @@
  {/* zName:     */ "auto_vacuum",
   /* ePragTyp:  */ PragTyp_AUTO_VACUUM,
   /* ePragFlg:  */
PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1,
   /* ColNames:  */ 0, 0,
   /* iArg:      */ 0 },
+#endif
+#if !defined(SQLITE_OMIT_AUTOVACUUM)
+  { /* zName:     */ "auto_vacuum_slack_pages",
+    /* ePragTyp:  */ PragTyp_AUTO_VACUUM_SLACK_PAGES,
+    /* ePragFlag: */ 0,
+    /* iArg:      */ 0 },
 #endif
 #if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
 #if !defined(SQLITE_OMIT_AUTOMATIC_INDEX)
  {/* zName:     */ "automatic_index",
   /* ePragTyp:  */ PragTyp_FLAG,
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Richard Hipp-3
On 2/13/17, Scott Hess <[hidden email]> wrote:
>
> Below is a patch which implements [PRAGMA auto_vacuum_slack_pages = N].
> This setting allows client code to signal that auto_vacuum can leave pages
> on the freelist until releasing them would allow a db size change.

This makes me want to ask:  Is anybody still using auto_vacuum?  And
if they are, should they be?

Auto-vacuum was invented way back in 2004, for Motorola, who at the
time was the worlds leading manufacturer of mobile phones.  This was
during the heyday of flip-phones, before iPhone or Android.  The
devices had very little flash memory - total storage capacity was
measured in megabytes rather than gigabytes.  If storage ran low,
Motorola wanted to be able to VACUUM the SQLite databases to make them
smaller.  The problem there is VACUUM requires nearly 2x the size of
the original database in temp storage, so if you are already low on
space, VACUUM probably won't work.  The solution was auto-vacuum,
which keeps the databases at near their minimum size at all times, at
the cost of some extra database fragmentation, more I/O, and reduced
performance.

Fast foward 13 years (how long is that in internet-years?) and the
situation has changed.  Does anybody really care anymore that a
database file might have a few dozen pages on its freelist?  Or if
they do care, does anybody lack the temp space sufficient to run a
real VACUUM?  My impression is that these days people just want the
database to run fast and with a minimum of I/O and are not overly
concerned with a few extra freelist pages, which means that
auto-vacuum should remain turned off.

Scott:  The motivation for your patch seem to be to get auto-vacuum to
run a little faster.  But if performance is your goal, why not just
turn auto-vacuum off?  Or, failing that, set it to INCREMENTAL and
then run "PRAGMA incremental_vacuum" when "PRAGMA freelist_count"
reaches some threshold?
--
D. Richard Hipp
[hidden email]
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Donald Shepherd
I use auto-vacuum in my application storage for work.  This was introduced
in about 2013.

The motivation was more political than anything though, as convincing some
as to the introduction requiring jumping through some pretty arbitrary
hoops.  Enabling auto-vacuum was one of those, to mitigate concerns that a
piece of software unused in the immediate development centre would chew up
resources needed by other more business critical processes.

On Tue, 14 Feb 2017 at 07:14 Richard Hipp <[hidden email]> wrote:

> On 2/13/17, Scott Hess <[hidden email]> wrote:
> >
> > Below is a patch which implements [PRAGMA auto_vacuum_slack_pages = N].
> > This setting allows client code to signal that auto_vacuum can leave
> pages
> > on the freelist until releasing them would allow a db size change.
>
> This makes me want to ask:  Is anybody still using auto_vacuum?  And
> if they are, should they be?
>
> Auto-vacuum was invented way back in 2004, for Motorola, who at the
> time was the worlds leading manufacturer of mobile phones.  This was
> during the heyday of flip-phones, before iPhone or Android.  The
> devices had very little flash memory - total storage capacity was
> measured in megabytes rather than gigabytes.  If storage ran low,
> Motorola wanted to be able to VACUUM the SQLite databases to make them
> smaller.  The problem there is VACUUM requires nearly 2x the size of
> the original database in temp storage, so if you are already low on
> space, VACUUM probably won't work.  The solution was auto-vacuum,
> which keeps the databases at near their minimum size at all times, at
> the cost of some extra database fragmentation, more I/O, and reduced
> performance.
>
> Fast foward 13 years (how long is that in internet-years?) and the
> situation has changed.  Does anybody really care anymore that a
> database file might have a few dozen pages on its freelist?  Or if
> they do care, does anybody lack the temp space sufficient to run a
> real VACUUM?  My impression is that these days people just want the
> database to run fast and with a minimum of I/O and are not overly
> concerned with a few extra freelist pages, which means that
> auto-vacuum should remain turned off.
>
> Scott:  The motivation for your patch seem to be to get auto-vacuum to
> run a little faster.  But if performance is your goal, why not just
> turn auto-vacuum off?  Or, failing that, set it to INCREMENTAL and
> then run "PRAGMA incremental_vacuum" when "PRAGMA freelist_count"
> reaches some threshold?
> --
> D. Richard Hipp
> [hidden email]
> _______________________________________________
> sqlite-users mailing list
> [hidden email]
> http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
>
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Stephen Chrzanowski
In reply to this post by Richard Hipp-3
I use it, not due to drive space, but because I sometimes check my DB into
a source code repo.


On Mon, Feb 13, 2017 at 3:13 PM, Richard Hipp <[hidden email]> wrote:

> On 2/13/17, Scott Hess <[hidden email]> wrote:
> >
> > Below is a patch which implements [PRAGMA auto_vacuum_slack_pages = N].
> > This setting allows client code to signal that auto_vacuum can leave
> pages
> > on the freelist until releasing them would allow a db size change.
>
> This makes me want to ask:  Is anybody still using auto_vacuum?  And
> if they are, should they be?
>
>
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Scott Hess
In reply to this post by Richard Hipp-3
On Mon, Feb 13, 2017 at 12:13 PM, Richard Hipp <[hidden email]> wrote:
>
> Scott:  The motivation for your patch seem to be to get auto-vacuum to
> run a little faster.  But if performance is your goal, why not just
> turn auto-vacuum off?  Or, failing that, set it to INCREMENTAL and
> then run "PRAGMA incremental_vacuum" when "PRAGMA freelist_count"
> reaches some threshold?
>

My first implementation did this manually using incremental-vacuum in a
layer above SQLite, because I already had a convenient function which
detected when updates had happened.  For explicit transactions, this is
pretty reasonable, albeit inelegant depending on whether you provide a
Commit() function or have to detect a COMMIT statement using string
operations.  But for auto-commit statements you can't get your incremental
vacuum into the commit scope.  [AFAICT, the commit and update hooks do not
allow running anything against the database.]  This version is certainly
doable if my suggested patch is considered beyond the pale.

I then reimplemented as more of an auto-incremental-vacuum, basically the
same code as I posted except that it keyed off the incremental-vacuum
flag.  It was pretty clean, but I did find myself thinking that it wouldn't
work well if you wrote code assuming the periodic cleaning was happening
automatically, but someone had neglected to setup the slack-sizing pragma.
In that case it would just stop collecting garbage, and your code would
never do it explicitly.  With the auto-vacuum version, it would just fall
back to auto-vacuum-every-time.  [I'm not sure which is actually worse, in
the end.]

Mostly, my motivation was that it felt like there was a gap between the
SQLITE_FCNTL_CHUNK_SIZE feature and auto_vacuum/incremental_vacuum, and
when I looked the gap was pretty clean to fill.  Having it key off of the
actual "Should I vacuum free pages" decision seemed more reasonable than
writing external code which makes educated guesses about what's going on.

I'm not really looking at "Should I rewrite all of the auto-vacuum
databases I can find to use manual vacuum".  I think a change like this
would make auto-vacuum databases behave a bit more like non-auto-vacuum
databases in terms of reuse of free space.

---

WRT your list of reasons for "Why even bother", another consideration to
add is that fragmentation on SSDs may not be the big problem it is on hard
drives.  If you have your page sizes reasonably aligned with the units of
the underlying filesystem and hardware, it may not matter much whether a
particular page is next to other pages which are logically adjacent in the
btree.  Of course, there's a lot of complexity in there, like whether the
OS continues to do read-ahead for SSDs.

That said, AFAICT, there's no code in place to make page placement
decisions based on locality, so I would expect that a
non-auto/incremental-vacuum database being actively updated would also
generate fragmentation.

---

I'm not going to get into the "Why not just use VACUUM".  My experience is
that this is a tough issue with databases in general, and developers often
have troubles wrapping their heads around it.  In my case, I'm thinking of
how to mitigate some concerns developers had with existing code, not how
they might write greenfield code going forward.  [As if they'd even ask my
advice before writing code and shipping it.  They only ask me things after
they've already shipped a problem :-).]

-scott
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Jens Alfke-2
In reply to this post by Richard Hipp-3

> On Feb 13, 2017, at 12:13 PM, Richard Hipp <[hidden email]> wrote:
>
> Does anybody really care anymore that a
> database file might have a few dozen pages on its freelist?  Or if
> they do care, does anybody lack the temp space sufficient to run a
> real VACUUM?

The issue of vacuuming has confused me for a long time. It’s unclear from the docs how necessary it is, or when it should be run, and there’s a lot of conflicting advice online.

A small amount of wasted space isn’t a problem, but without vacuuming the database file _will never shrink_. ("When auto-vacuum is disabled and data is deleted data from a database, the database file remains the same size.”[1]) This could lead to large amounts of waste. (We have had developers complain to us that our library doesn’t reduce its disk usage after they delete data through its API.) It also means that if a user deliberately tries to free up storage by deleting app data, they won’t see any improvement, which can be frustrating.

Running a full VACUUM is problematic for a mobile app because
* It can take quite a long time if the database is large or if I/O bandwidth is low.
* It requires potentially a lot of storage space, at a time when it’s likely that free space is low.
* IIRC it blocks access to the database file by all handles while it’s running, so it can’t be done while the database is otherwise in use, only when the app is quitting or being backgrounded.
* If the process is killed during this activity [as is entirely possible on iOS, because the OS will terminate processes that are unresponsive or that keep doing stuff too long after they’ve been backgrounded] it will leave large amounts of wasted storage behind until it starts again, and all the incremental progress of the vacuum is lost.

The strategy I’m using in my current codebase is to enable incremental auto_vacuum, and then before closing the database check the free page count. If it’s too high, I run an incremental_vacuum. This was suggested by someone’s blog post that I neglected to bookmark. So far it seems to work well, but this codebase is still in development and hasn’t been through a lot of stress testing.

—Jens

[1]: http://www.sqlite.org/pragma.html#pragma_auto_vacuum
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Patch for consideration: auto_vacuum slack.

Chris Brody
In reply to this post by Richard Hipp-3
On Mon, Feb 13, 2017 at 9:13 PM, Richard Hipp <[hidden email]> wrote:
> [...]
> This makes me want to ask:  Is anybody still using auto_vacuum?  And
> if they are, should they be?

I am thinking to change the commonly-used Cordova/PhoneGap sqlite
plugin [1] to enable auto-vacuum by default for the following reasons:
* Potential issues with running a full VACUUM on a mobile app, just
described by Jens Alfke;
* A number of users are beginning programmers who may not read enough
background documentation to understand the importance of periodic
vacuuming.

The sqlite plugin provides an API very similar to the DRAFT Web SQL
API [2]. While this is now deprecated it does provide the flexibility
needed by many users. Considering that [2] does not mention any need
for the web programmer to do periodic vacuuming, I suspect a good Web
SQL implementation should have auto-vacuum enabled by default. Or am I
mistaken somehow?

I would personally vote for this improvement to be included but
perhaps with a compile-time option to leave it out?

[1] https://github.com/litehelpers/Cordova-sqlite-storage
[2] https://www.w3.org/TR/webdatabase/
_______________________________________________
sqlite-users mailing list
[hidden email]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
Loading...