monotone

monotone Mtn Source Tree

Root/sqlite/os_unix.c

1/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
15#include "sqliteInt.h"
16#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
18
19/* #define SQLITE_ENABLE_LOCKING_STYLE 0 */
20
21/*
22** These #defines should enable >2GB file support on Posix if the
23** underlying operating system supports it. If the OS lacks
24** large file support, these should be no-ops.
25**
26** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
27** on the compiler command line. This is necessary if you are compiling
28** on a recent machine (ex: RedHat 7.2) but you want your code to work
29** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
30** without this option, LFS is enable. But LFS does not exist in the kernel
31** in RedHat 6.0, so the code won't work. Hence, for maximum binary
32** portability you should omit LFS.
33*/
34#ifndef SQLITE_DISABLE_LFS
35# define _LARGE_FILE 1
36# ifndef _FILE_OFFSET_BITS
37# define _FILE_OFFSET_BITS 64
38# endif
39# define _LARGEFILE_SOURCE 1
40#endif
41
42/*
43** standard include files.
44*/
45#include <sys/types.h>
46#include <sys/stat.h>
47#include <fcntl.h>
48#include <unistd.h>
49#include <time.h>
50#include <sys/time.h>
51#include <errno.h>
52#ifdef SQLITE_ENABLE_LOCKING_STYLE
53#include <sys/ioctl.h>
54#include <sys/param.h>
55#include <sys/mount.h>
56#endif /* SQLITE_ENABLE_LOCKING_STYLE */
57
58/*
59** If we are to be thread-safe, include the pthreads header and define
60** the SQLITE_UNIX_THREADS macro.
61*/
62#ifndef THREADSAFE
63# define THREADSAFE 1
64#endif
65#if THREADSAFE
66# include <pthread.h>
67# define SQLITE_UNIX_THREADS 1
68#endif
69
70/*
71** Default permissions when creating a new file
72*/
73#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
74# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
75#endif
76
77
78
79/*
80** The unixFile structure is subclass of OsFile specific for the unix
81** protability layer.
82*/
83typedef struct unixFile unixFile;
84struct unixFile {
85 IoMethod const *pMethod; /* Always the first entry */
86 struct openCnt *pOpen; /* Info about all open fd's on this inode */
87 struct lockInfo *pLock; /* Info about locks on this inode */
88#ifdef SQLITE_ENABLE_LOCKING_STYLE
89 void *lockingContext; /* Locking style specific state */
90#endif /* SQLITE_ENABLE_LOCKING_STYLE */
91 int h; /* The file descriptor */
92 unsigned char locktype; /* The type of lock held on this fd */
93 unsigned char isOpen; /* True if needs to be closed */
94 unsigned char fullSync; /* Use F_FULLSYNC if available */
95 int dirfd; /* File descriptor for the directory */
96 i64 offset; /* Seek offset */
97#ifdef SQLITE_UNIX_THREADS
98 pthread_t tid; /* The thread that "owns" this OsFile */
99#endif
100};
101
102/*
103** Provide the ability to override some OS-layer functions during
104** testing. This is used to simulate OS crashes to verify that
105** commits are atomic even in the event of an OS crash.
106*/
107#ifdef SQLITE_CRASH_TEST
108 extern int sqlite3CrashTestEnable;
109 extern int sqlite3CrashOpenReadWrite(const char*, OsFile**, int*);
110 extern int sqlite3CrashOpenExclusive(const char*, OsFile**, int);
111 extern int sqlite3CrashOpenReadOnly(const char*, OsFile**, int);
112# define CRASH_TEST_OVERRIDE(X,A,B,C) \
113 if(sqlite3CrashTestEnable){ return X(A,B,C); }
114#else
115# define CRASH_TEST_OVERRIDE(X,A,B,C) /* no-op */
116#endif
117
118
119/*
120** Include code that is common to all os_*.c files
121*/
122#include "os_common.h"
123
124/*
125** Do not include any of the File I/O interface procedures if the
126** SQLITE_OMIT_DISKIO macro is defined (indicating that the database
127** will be in-memory only)
128*/
129#ifndef SQLITE_OMIT_DISKIO
130
131
132/*
133** Define various macros that are missing from some systems.
134*/
135#ifndef O_LARGEFILE
136# define O_LARGEFILE 0
137#endif
138#ifdef SQLITE_DISABLE_LFS
139# undef O_LARGEFILE
140# define O_LARGEFILE 0
141#endif
142#ifndef O_NOFOLLOW
143# define O_NOFOLLOW 0
144#endif
145#ifndef O_BINARY
146# define O_BINARY 0
147#endif
148
149/*
150** The DJGPP compiler environment looks mostly like Unix, but it
151** lacks the fcntl() system call. So redefine fcntl() to be something
152** that always succeeds. This means that locking does not occur under
153** DJGPP. But it's DOS - what did you expect?
154*/
155#ifdef __DJGPP__
156# define fcntl(A,B,C) 0
157#endif
158
159/*
160** The threadid macro resolves to the thread-id or to 0. Used for
161** testing and debugging only.
162*/
163#ifdef SQLITE_UNIX_THREADS
164#define threadid pthread_self()
165#else
166#define threadid 0
167#endif
168
169/*
170** Set or check the OsFile.tid field. This field is set when an OsFile
171** is first opened. All subsequent uses of the OsFile verify that the
172** same thread is operating on the OsFile. Some operating systems do
173** not allow locks to be overridden by other threads and that restriction
174** means that sqlite3* database handles cannot be moved from one thread
175** to another. This logic makes sure a user does not try to do that
176** by mistake.
177**
178** Version 3.3.1 (2006-01-15): OsFiles can be moved from one thread to
179** another as long as we are running on a system that supports threads
180** overriding each others locks (which now the most common behavior)
181** or if no locks are held. But the OsFile.pLock field needs to be
182** recomputed because its key includes the thread-id. See the
183** transferOwnership() function below for additional information
184*/
185#if defined(SQLITE_UNIX_THREADS)
186# define SET_THREADID(X) (X)->tid = pthread_self()
187# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
188 !pthread_equal((X)->tid, pthread_self()))
189#else
190# define SET_THREADID(X)
191# define CHECK_THREADID(X) 0
192#endif
193
194/*
195** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
196** section 6.5.2.2 lines 483 through 490 specify that when a process
197** sets or clears a lock, that operation overrides any prior locks set
198** by the same process. It does not explicitly say so, but this implies
199** that it overrides locks set by the same process using a different
200** file descriptor. Consider this test case:
201**
202** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
203** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
204**
205** Suppose ./file1 and ./file2 are really the same file (because
206** one is a hard or symbolic link to the other) then if you set
207** an exclusive lock on fd1, then try to get an exclusive lock
208** on fd2, it works. I would have expected the second lock to
209** fail since there was already a lock on the file due to fd1.
210** But not so. Since both locks came from the same process, the
211** second overrides the first, even though they were on different
212** file descriptors opened on different file names.
213**
214** Bummer. If you ask me, this is broken. Badly broken. It means
215** that we cannot use POSIX locks to synchronize file access among
216** competing threads of the same process. POSIX locks will work fine
217** to synchronize access for threads in separate processes, but not
218** threads within the same process.
219**
220** To work around the problem, SQLite has to manage file locks internally
221** on its own. Whenever a new database is opened, we have to find the
222** specific inode of the database file (the inode is determined by the
223** st_dev and st_ino fields of the stat structure that fstat() fills in)
224** and check for locks already existing on that inode. When locks are
225** created or removed, we have to look at our own internal record of the
226** locks to see if another thread has previously set a lock on that same
227** inode.
228**
229** The OsFile structure for POSIX is no longer just an integer file
230** descriptor. It is now a structure that holds the integer file
231** descriptor and a pointer to a structure that describes the internal
232** locks on the corresponding inode. There is one locking structure
233** per inode, so if the same inode is opened twice, both OsFile structures
234** point to the same locking structure. The locking structure keeps
235** a reference count (so we will know when to delete it) and a "cnt"
236** field that tells us its internal lock status. cnt==0 means the
237** file is unlocked. cnt==-1 means the file has an exclusive lock.
238** cnt>0 means there are cnt shared locks on the file.
239**
240** Any attempt to lock or unlock a file first checks the locking
241** structure. The fcntl() system call is only invoked to set a
242** POSIX lock if the internal lock structure transitions between
243** a locked and an unlocked state.
244**
245** 2004-Jan-11:
246** More recent discoveries about POSIX advisory locks. (The more
247** I discover, the more I realize the a POSIX advisory locks are
248** an abomination.)
249**
250** If you close a file descriptor that points to a file that has locks,
251** all locks on that file that are owned by the current process are
252** released. To work around this problem, each OsFile structure contains
253** a pointer to an openCnt structure. There is one openCnt structure
254** per open inode, which means that multiple OsFiles can point to a single
255** openCnt. When an attempt is made to close an OsFile, if there are
256** other OsFiles open on the same inode that are holding locks, the call
257** to close() the file descriptor is deferred until all of the locks clear.
258** The openCnt structure keeps a list of file descriptors that need to
259** be closed and that list is walked (and cleared) when the last lock
260** clears.
261**
262** First, under Linux threads, because each thread has a separate
263** process ID, lock operations in one thread do not override locks
264** to the same file in other threads. Linux threads behave like
265** separate processes in this respect. But, if you close a file
266** descriptor in linux threads, all locks are cleared, even locks
267** on other threads and even though the other threads have different
268** process IDs. Linux threads is inconsistent in this respect.
269** (I'm beginning to think that linux threads is an abomination too.)
270** The consequence of this all is that the hash table for the lockInfo
271** structure has to include the process id as part of its key because
272** locks in different threads are treated as distinct. But the
273** openCnt structure should not include the process id in its
274** key because close() clears lock on all threads, not just the current
275** thread. Were it not for this goofiness in linux threads, we could
276** combine the lockInfo and openCnt structures into a single structure.
277**
278** 2004-Jun-28:
279** On some versions of linux, threads can override each others locks.
280** On others not. Sometimes you can change the behavior on the same
281** system by setting the LD_ASSUME_KERNEL environment variable. The
282** POSIX standard is silent as to which behavior is correct, as far
283** as I can tell, so other versions of unix might show the same
284** inconsistency. There is no little doubt in my mind that posix
285** advisory locks and linux threads are profoundly broken.
286**
287** To work around the inconsistencies, we have to test at runtime
288** whether or not threads can override each others locks. This test
289** is run once, the first time any lock is attempted. A static
290** variable is set to record the results of this test for future
291** use.
292*/
293
294/*
295** An instance of the following structure serves as the key used
296** to locate a particular lockInfo structure given its inode.
297**
298** If threads cannot override each others locks, then we set the
299** lockKey.tid field to the thread ID. If threads can override
300** each others locks then tid is always set to zero. tid is omitted
301** if we compile without threading support.
302*/
303struct lockKey {
304 dev_t dev; /* Device number */
305 ino_t ino; /* Inode number */
306#ifdef SQLITE_UNIX_THREADS
307 pthread_t tid; /* Thread ID or zero if threads can override each other */
308#endif
309};
310
311/*
312** An instance of the following structure is allocated for each open
313** inode on each thread with a different process ID. (Threads have
314** different process IDs on linux, but not on most other unixes.)
315**
316** A single inode can have multiple file descriptors, so each OsFile
317** structure contains a pointer to an instance of this object and this
318** object keeps a count of the number of OsFiles pointing to it.
319*/
320struct lockInfo {
321 struct lockKey key; /* The lookup key */
322 int cnt; /* Number of SHARED locks held */
323 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
324 int nRef; /* Number of pointers to this structure */
325};
326
327/*
328** An instance of the following structure serves as the key used
329** to locate a particular openCnt structure given its inode. This
330** is the same as the lockKey except that the thread ID is omitted.
331*/
332struct openKey {
333 dev_t dev; /* Device number */
334 ino_t ino; /* Inode number */
335};
336
337/*
338** An instance of the following structure is allocated for each open
339** inode. This structure keeps track of the number of locks on that
340** inode. If a close is attempted against an inode that is holding
341** locks, the close is deferred until all locks clear by adding the
342** file descriptor to be closed to the pending list.
343*/
344struct openCnt {
345 struct openKey key; /* The lookup key */
346 int nRef; /* Number of pointers to this structure */
347 int nLock; /* Number of outstanding locks */
348 int nPending; /* Number of pending close() operations */
349 int *aPending; /* Malloced space holding fd's awaiting a close() */
350};
351
352/*
353** These hash tables map inodes and file descriptors (really, lockKey and
354** openKey structures) into lockInfo and openCnt structures. Access to
355** these hash tables must be protected by a mutex.
356*/
357static Hash lockHash = {SQLITE_HASH_BINARY, 0, 0, 0,
358 sqlite3ThreadSafeMalloc, sqlite3ThreadSafeFree, 0, 0};
359static Hash openHash = {SQLITE_HASH_BINARY, 0, 0, 0,
360 sqlite3ThreadSafeMalloc, sqlite3ThreadSafeFree, 0, 0};
361
362#ifdef SQLITE_ENABLE_LOCKING_STYLE
363/*
364** The locking styles are associated with the different file locking
365** capabilities supported by different file systems.
366**
367** POSIX locking style fully supports shared and exclusive byte-range locks
368** ADP locking only supports exclusive byte-range locks
369** FLOCK only supports a single file-global exclusive lock
370** DOTLOCK isn't a true locking style, it refers to the use of a special
371** file named the same as the database file with a '.lock' extension, this
372** can be used on file systems that do not offer any reliable file locking
373** NO locking means that no locking will be attempted, this is only used for
374** read-only file systems currently
375** UNSUPPORTED means that no locking will be attempted, this is only used for
376** file systems that are known to be unsupported
377*/
378typedef enum {
379posixLockingStyle = 0, /* standard posix-advisory locks */
380afpLockingStyle, /* use afp locks */
381flockLockingStyle, /* use flock() */
382dotlockLockingStyle, /* use <file>.lock files */
383noLockingStyle, /* useful for read-only file system */
384unsupportedLockingStyle /* indicates unsupported file system */
385} sqlite3LockingStyle;
386#endif /* SQLITE_ENABLE_LOCKING_STYLE */
387
388#ifdef SQLITE_UNIX_THREADS
389/*
390** This variable records whether or not threads can override each others
391** locks.
392**
393** 0: No. Threads cannot override each others locks.
394** 1: Yes. Threads can override each others locks.
395** -1: We don't know yet.
396**
397** On some systems, we know at compile-time if threads can override each
398** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
399** will be set appropriately. On other systems, we have to check at
400** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
401** undefined.
402**
403** This variable normally has file scope only. But during testing, we make
404** it a global so that the test code can change its value in order to verify
405** that the right stuff happens in either case.
406*/
407#ifndef SQLITE_THREAD_OVERRIDE_LOCK
408# define SQLITE_THREAD_OVERRIDE_LOCK -1
409#endif
410#ifdef SQLITE_TEST
411int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
412#else
413static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
414#endif
415
416/*
417** This structure holds information passed into individual test
418** threads by the testThreadLockingBehavior() routine.
419*/
420struct threadTestData {
421 int fd; /* File to be locked */
422 struct flock lock; /* The locking operation */
423 int result; /* Result of the locking operation */
424};
425
426#ifdef SQLITE_LOCK_TRACE
427/*
428** Print out information about all locking operations.
429**
430** This routine is used for troubleshooting locks on multithreaded
431** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
432** command-line option on the compiler. This code is normally
433** turned off.
434*/
435static int lockTrace(int fd, int op, struct flock *p){
436 char *zOpName, *zType;
437 int s;
438 int savedErrno;
439 if( op==F_GETLK ){
440 zOpName = "GETLK";
441 }else if( op==F_SETLK ){
442 zOpName = "SETLK";
443 }else{
444 s = fcntl(fd, op, p);
445 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
446 return s;
447 }
448 if( p->l_type==F_RDLCK ){
449 zType = "RDLCK";
450 }else if( p->l_type==F_WRLCK ){
451 zType = "WRLCK";
452 }else if( p->l_type==F_UNLCK ){
453 zType = "UNLCK";
454 }else{
455 assert( 0 );
456 }
457 assert( p->l_whence==SEEK_SET );
458 s = fcntl(fd, op, p);
459 savedErrno = errno;
460 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
461 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
462 (int)p->l_pid, s);
463 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
464 struct flock l2;
465 l2 = *p;
466 fcntl(fd, F_GETLK, &l2);
467 if( l2.l_type==F_RDLCK ){
468 zType = "RDLCK";
469 }else if( l2.l_type==F_WRLCK ){
470 zType = "WRLCK";
471 }else if( l2.l_type==F_UNLCK ){
472 zType = "UNLCK";
473 }else{
474 assert( 0 );
475 }
476 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
477 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
478 }
479 errno = savedErrno;
480 return s;
481}
482#define fcntl lockTrace
483#endif /* SQLITE_LOCK_TRACE */
484
485/*
486** The testThreadLockingBehavior() routine launches two separate
487** threads on this routine. This routine attempts to lock a file
488** descriptor then returns. The success or failure of that attempt
489** allows the testThreadLockingBehavior() procedure to determine
490** whether or not threads can override each others locks.
491*/
492static void *threadLockingTest(void *pArg){
493 struct threadTestData *pData = (struct threadTestData*)pArg;
494 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
495 return pArg;
496}
497
498/*
499** This procedure attempts to determine whether or not threads
500** can override each others locks then sets the
501** threadsOverrideEachOthersLocks variable appropriately.
502*/
503static void testThreadLockingBehavior(int fd_orig){
504 int fd;
505 struct threadTestData d[2];
506 pthread_t t[2];
507
508 fd = dup(fd_orig);
509 if( fd<0 ) return;
510 memset(d, 0, sizeof(d));
511 d[0].fd = fd;
512 d[0].lock.l_type = F_RDLCK;
513 d[0].lock.l_len = 1;
514 d[0].lock.l_start = 0;
515 d[0].lock.l_whence = SEEK_SET;
516 d[1] = d[0];
517 d[1].lock.l_type = F_WRLCK;
518 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
519 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
520 pthread_join(t[0], 0);
521 pthread_join(t[1], 0);
522 close(fd);
523 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
524}
525#endif /* SQLITE_UNIX_THREADS */
526
527/*
528** Release a lockInfo structure previously allocated by findLockInfo().
529*/
530static void releaseLockInfo(struct lockInfo *pLock){
531 assert( sqlite3OsInMutex(1) );
532 if (pLock == NULL)
533 return;
534 pLock->nRef--;
535 if( pLock->nRef==0 ){
536 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
537 sqlite3ThreadSafeFree(pLock);
538 }
539}
540
541/*
542** Release a openCnt structure previously allocated by findLockInfo().
543*/
544static void releaseOpenCnt(struct openCnt *pOpen){
545 assert( sqlite3OsInMutex(1) );
546 if (pOpen == NULL)
547 return;
548 pOpen->nRef--;
549 if( pOpen->nRef==0 ){
550 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
551 free(pOpen->aPending);
552 sqlite3ThreadSafeFree(pOpen);
553 }
554}
555
556#ifdef SQLITE_ENABLE_LOCKING_STYLE
557/*
558** Tests a byte-range locking query to see if byte range locks are
559** supported, if not we fall back to dotlockLockingStyle.
560*/
561static sqlite3LockingStyle sqlite3TestLockingStyle(const char *filePath,
562 int fd) {
563 /* test byte-range lock using fcntl */
564 struct flock lockInfo;
565
566 lockInfo.l_len = 1;
567 lockInfo.l_start = 0;
568 lockInfo.l_whence = SEEK_SET;
569 lockInfo.l_type = F_RDLCK;
570
571 if (fcntl(fd, F_GETLK, &lockInfo) != -1) {
572 return posixLockingStyle;
573 }
574
575 /* testing for flock can give false positives. So if if the above test
576 ** fails, then we fall back to using dot-lock style locking.
577 */
578 return dotlockLockingStyle;
579}
580
581/*
582** Examines the f_fstypename entry in the statfs structure as returned by
583** stat() for the file system hosting the database file, assigns the
584** appropriate locking style based on it's value. These values and
585** assignments are based on Darwin/OSX behavior and have not been tested on
586** other systems.
587*/
588static sqlite3LockingStyle sqlite3DetectLockingStyle(const char *filePath,
589 int fd) {
590
591#ifdef SQLITE_FIXED_LOCKING_STYLE
592 return (sqlite3LockingStyle)SQLITE_FIXED_LOCKING_STYLE;
593#else
594 struct statfs fsInfo;
595
596 if (statfs(filePath, &fsInfo) == -1)
597 return sqlite3TestLockingStyle(filePath, fd);
598
599 if (fsInfo.f_flags & MNT_RDONLY)
600 return noLockingStyle;
601
602 if( (!strcmp(fsInfo.f_fstypename, "hfs")) ||
603 (!strcmp(fsInfo.f_fstypename, "ufs")) )
604return posixLockingStyle;
605
606 if(!strcmp(fsInfo.f_fstypename, "afpfs"))
607 return afpLockingStyle;
608
609 if(!strcmp(fsInfo.f_fstypename, "nfs"))
610 return sqlite3TestLockingStyle(filePath, fd);
611
612 if(!strcmp(fsInfo.f_fstypename, "smbfs"))
613 return flockLockingStyle;
614
615 if(!strcmp(fsInfo.f_fstypename, "msdos"))
616 return dotlockLockingStyle;
617
618 if(!strcmp(fsInfo.f_fstypename, "webdav"))
619 return unsupportedLockingStyle;
620
621 return sqlite3TestLockingStyle(filePath, fd);
622#endif /* SQLITE_FIXED_LOCKING_STYLE */
623}
624
625#endif /* SQLITE_ENABLE_LOCKING_STYLE */
626
627/*
628** Given a file descriptor, locate lockInfo and openCnt structures that
629** describes that file descriptor. Create new ones if necessary. The
630** return values might be uninitialized if an error occurs.
631**
632** Return the number of errors.
633*/
634static int findLockInfo(
635 int fd, /* The file descriptor used in the key */
636 struct lockInfo **ppLock, /* Return the lockInfo structure here */
637 struct openCnt **ppOpen /* Return the openCnt structure here */
638){
639 int rc;
640 struct lockKey key1;
641 struct openKey key2;
642 struct stat statbuf;
643 struct lockInfo *pLock;
644 struct openCnt *pOpen;
645 rc = fstat(fd, &statbuf);
646 if( rc!=0 ) return 1;
647
648 assert( sqlite3OsInMutex(1) );
649 memset(&key1, 0, sizeof(key1));
650 key1.dev = statbuf.st_dev;
651 key1.ino = statbuf.st_ino;
652#ifdef SQLITE_UNIX_THREADS
653 if( threadsOverrideEachOthersLocks<0 ){
654 testThreadLockingBehavior(fd);
655 }
656 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
657#endif
658 memset(&key2, 0, sizeof(key2));
659 key2.dev = statbuf.st_dev;
660 key2.ino = statbuf.st_ino;
661 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
662 if( pLock==0 ){
663 struct lockInfo *pOld;
664 pLock = sqlite3ThreadSafeMalloc( sizeof(*pLock) );
665 if( pLock==0 ){
666 rc = 1;
667 goto exit_findlockinfo;
668 }
669 pLock->key = key1;
670 pLock->nRef = 1;
671 pLock->cnt = 0;
672 pLock->locktype = 0;
673 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
674 if( pOld!=0 ){
675 assert( pOld==pLock );
676 sqlite3ThreadSafeFree(pLock);
677 rc = 1;
678 goto exit_findlockinfo;
679 }
680 }else{
681 pLock->nRef++;
682 }
683 *ppLock = pLock;
684 if( ppOpen!=0 ){
685 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
686 if( pOpen==0 ){
687 struct openCnt *pOld;
688 pOpen = sqlite3ThreadSafeMalloc( sizeof(*pOpen) );
689 if( pOpen==0 ){
690 releaseLockInfo(pLock);
691 rc = 1;
692 goto exit_findlockinfo;
693 }
694 pOpen->key = key2;
695 pOpen->nRef = 1;
696 pOpen->nLock = 0;
697 pOpen->nPending = 0;
698 pOpen->aPending = 0;
699 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
700 if( pOld!=0 ){
701 assert( pOld==pOpen );
702 sqlite3ThreadSafeFree(pOpen);
703 releaseLockInfo(pLock);
704 rc = 1;
705 goto exit_findlockinfo;
706 }
707 }else{
708 pOpen->nRef++;
709 }
710 *ppOpen = pOpen;
711 }
712
713exit_findlockinfo:
714 return rc;
715}
716
717#ifdef SQLITE_DEBUG
718/*
719** Helper function for printing out trace information from debugging
720** binaries. This returns the string represetation of the supplied
721** integer lock-type.
722*/
723static const char *locktypeName(int locktype){
724 switch( locktype ){
725 case NO_LOCK: return "NONE";
726 case SHARED_LOCK: return "SHARED";
727 case RESERVED_LOCK: return "RESERVED";
728 case PENDING_LOCK: return "PENDING";
729 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
730 }
731 return "ERROR";
732}
733#endif
734
735/*
736** If we are currently in a different thread than the thread that the
737** unixFile argument belongs to, then transfer ownership of the unixFile
738** over to the current thread.
739**
740** A unixFile is only owned by a thread on systems where one thread is
741** unable to override locks created by a different thread. RedHat9 is
742** an example of such a system.
743**
744** Ownership transfer is only allowed if the unixFile is currently unlocked.
745** If the unixFile is locked and an ownership is wrong, then return
746** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
747*/
748#ifdef SQLITE_UNIX_THREADS
749static int transferOwnership(unixFile *pFile){
750 int rc;
751 pthread_t hSelf;
752 if( threadsOverrideEachOthersLocks ){
753 /* Ownership transfers not needed on this system */
754 return SQLITE_OK;
755 }
756 hSelf = pthread_self();
757 if( pthread_equal(pFile->tid, hSelf) ){
758 /* We are still in the same thread */
759 OSTRACE1("No-transfer, same thread\n");
760 return SQLITE_OK;
761 }
762 if( pFile->locktype!=NO_LOCK ){
763 /* We cannot change ownership while we are holding a lock! */
764 return SQLITE_MISUSE;
765 }
766 OSTRACE4("Transfer ownership of %d from %d to %d\n",
767 pFile->h, pFile->tid, hSelf);
768 pFile->tid = hSelf;
769 if (pFile->pLock != NULL) {
770 releaseLockInfo(pFile->pLock);
771 rc = findLockInfo(pFile->h, &pFile->pLock, 0);
772 OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
773 locktypeName(pFile->locktype),
774 locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
775 return rc;
776 } else {
777 return SQLITE_OK;
778 }
779}
780#else
781 /* On single-threaded builds, ownership transfer is a no-op */
782# define transferOwnership(X) SQLITE_OK
783#endif
784
785/*
786** Delete the named file
787*/
788int sqlite3UnixDelete(const char *zFilename){
789 SimulateIOError(return SQLITE_IOERR_DELETE);
790 unlink(zFilename);
791 return SQLITE_OK;
792}
793
794/*
795** Return TRUE if the named file exists.
796*/
797int sqlite3UnixFileExists(const char *zFilename){
798 return access(zFilename, 0)==0;
799}
800
801/* Forward declaration */
802static int allocateUnixFile(
803 int h, /* File descriptor of the open file */
804 OsFile **pId, /* Write the real file descriptor here */
805 const char *zFilename, /* Name of the file being opened */
806 int delFlag /* If true, make sure the file deletes on close */
807);
808
809/*
810** Attempt to open a file for both reading and writing. If that
811** fails, try opening it read-only. If the file does not exist,
812** try to create it.
813**
814** On success, a handle for the open file is written to *id
815** and *pReadonly is set to 0 if the file was opened for reading and
816** writing or 1 if the file was opened read-only. The function returns
817** SQLITE_OK.
818**
819** On failure, the function returns SQLITE_CANTOPEN and leaves
820** *id and *pReadonly unchanged.
821*/
822int sqlite3UnixOpenReadWrite(
823 const char *zFilename,
824 OsFile **pId,
825 int *pReadonly
826){
827 int h;
828
829 CRASH_TEST_OVERRIDE(sqlite3CrashOpenReadWrite, zFilename, pId, pReadonly);
830 assert( 0==*pId );
831 h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
832 SQLITE_DEFAULT_FILE_PERMISSIONS);
833 if( h<0 ){
834#ifdef EISDIR
835 if( errno==EISDIR ){
836 return SQLITE_CANTOPEN;
837 }
838#endif
839 h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
840 if( h<0 ){
841 return SQLITE_CANTOPEN;
842 }
843 *pReadonly = 1;
844 }else{
845 *pReadonly = 0;
846 }
847 return allocateUnixFile(h, pId, zFilename, 0);
848}
849
850
851/*
852** Attempt to open a new file for exclusive access by this process.
853** The file will be opened for both reading and writing. To avoid
854** a potential security problem, we do not allow the file to have
855** previously existed. Nor do we allow the file to be a symbolic
856** link.
857**
858** If delFlag is true, then make arrangements to automatically delete
859** the file when it is closed.
860**
861** On success, write the file handle into *id and return SQLITE_OK.
862**
863** On failure, return SQLITE_CANTOPEN.
864*/
865int sqlite3UnixOpenExclusive(const char *zFilename, OsFile **pId, int delFlag){
866 int h;
867
868 CRASH_TEST_OVERRIDE(sqlite3CrashOpenExclusive, zFilename, pId, delFlag);
869 assert( 0==*pId );
870 h = open(zFilename,
871 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
872 delFlag ? 0600 : SQLITE_DEFAULT_FILE_PERMISSIONS);
873 if( h<0 ){
874 return SQLITE_CANTOPEN;
875 }
876 return allocateUnixFile(h, pId, zFilename, delFlag);
877}
878
879/*
880** Attempt to open a new file for read-only access.
881**
882** On success, write the file handle into *id and return SQLITE_OK.
883**
884** On failure, return SQLITE_CANTOPEN.
885*/
886int sqlite3UnixOpenReadOnly(const char *zFilename, OsFile **pId){
887 int h;
888
889 CRASH_TEST_OVERRIDE(sqlite3CrashOpenReadOnly, zFilename, pId, 0);
890 assert( 0==*pId );
891 h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
892 if( h<0 ){
893 return SQLITE_CANTOPEN;
894 }
895 return allocateUnixFile(h, pId, zFilename, 0);
896}
897
898/*
899** Attempt to open a file descriptor for the directory that contains a
900** file. This file descriptor can be used to fsync() the directory
901** in order to make sure the creation of a new file is actually written
902** to disk.
903**
904** This routine is only meaningful for Unix. It is a no-op under
905** windows since windows does not support hard links.
906**
907** If FULL_FSYNC is enabled, this function is not longer useful,
908** a FULL_FSYNC sync applies to all pending disk operations.
909**
910** On success, a handle for a previously open file at *id is
911** updated with the new directory file descriptor and SQLITE_OK is
912** returned.
913**
914** On failure, the function returns SQLITE_CANTOPEN and leaves
915** *id unchanged.
916*/
917static int unixOpenDirectory(
918 OsFile *id,
919 const char *zDirname
920){
921 unixFile *pFile = (unixFile*)id;
922 assert( pFile!=0 );
923 SET_THREADID(pFile);
924 assert( pFile->dirfd<0 );
925 pFile->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
926 if( pFile->dirfd<0 ){
927 return SQLITE_CANTOPEN;
928 }
929 OSTRACE3("OPENDIR %-3d %s\n", pFile->dirfd, zDirname);
930 return SQLITE_OK;
931}
932
933/*
934** Create a temporary file name in zBuf. zBuf must be big enough to
935** hold at least SQLITE_TEMPNAME_SIZE characters.
936*/
937int sqlite3UnixTempFileName(char *zBuf){
938 static const char *azDirs[] = {
939 0,
940 "/var/tmp",
941 "/usr/tmp",
942 "/tmp",
943 ".",
944 };
945 static const unsigned char zChars[] =
946 "abcdefghijklmnopqrstuvwxyz"
947 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
948 "0123456789";
949 int i, j;
950 struct stat buf;
951 const char *zDir = ".";
952 azDirs[0] = sqlite3_temp_directory;
953 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
954 if( azDirs[i]==0 ) continue;
955 if( stat(azDirs[i], &buf) ) continue;
956 if( !S_ISDIR(buf.st_mode) ) continue;
957 if( access(azDirs[i], 07) ) continue;
958 zDir = azDirs[i];
959 break;
960 }
961 do{
962 sqlite3_snprintf(SQLITE_TEMPNAME_SIZE, zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
963 j = strlen(zBuf);
964 sqlite3Randomness(15, &zBuf[j]);
965 for(i=0; i<15; i++, j++){
966 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
967 }
968 zBuf[j] = 0;
969 }while( access(zBuf,0)==0 );
970 return SQLITE_OK;
971}
972
973/*
974** Check that a given pathname is a directory and is writable
975**
976*/
977int sqlite3UnixIsDirWritable(char *zBuf){
978#ifndef SQLITE_OMIT_PAGER_PRAGMAS
979 struct stat buf;
980 if( zBuf==0 ) return 0;
981 if( zBuf[0]==0 ) return 0;
982 if( stat(zBuf, &buf) ) return 0;
983 if( !S_ISDIR(buf.st_mode) ) return 0;
984 if( access(zBuf, 07) ) return 0;
985#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
986 return 1;
987}
988
989/*
990** Seek to the offset in id->offset then read cnt bytes into pBuf.
991** Return the number of bytes actually read. Update the offset.
992*/
993static int seekAndRead(unixFile *id, void *pBuf, int cnt){
994 int got;
995 i64 newOffset;
996 TIMER_START;
997#if defined(USE_PREAD)
998 got = pread(id->h, pBuf, cnt, id->offset);
999 SimulateIOError( got = -1 );
1000#elif defined(USE_PREAD64)
1001 got = pread64(id->h, pBuf, cnt, id->offset);
1002 SimulateIOError( got = -1 );
1003#else
1004 newOffset = lseek(id->h, id->offset, SEEK_SET);
1005 SimulateIOError( newOffset-- );
1006 if( newOffset!=id->offset ){
1007 return -1;
1008 }
1009 got = read(id->h, pBuf, cnt);
1010#endif
1011 TIMER_END;
1012 OSTRACE5("READ %-3d %5d %7lld %d\n", id->h, got, id->offset, TIMER_ELAPSED);
1013 if( got>0 ){
1014 id->offset += got;
1015 }
1016 return got;
1017}
1018
1019/*
1020** Read data from a file into a buffer. Return SQLITE_OK if all
1021** bytes were read successfully and SQLITE_IOERR if anything goes
1022** wrong.
1023*/
1024static int unixRead(OsFile *id, void *pBuf, int amt){
1025 int got;
1026 assert( id );
1027 got = seekAndRead((unixFile*)id, pBuf, amt);
1028 if( got==amt ){
1029 return SQLITE_OK;
1030 }else if( got<0 ){
1031 return SQLITE_IOERR_READ;
1032 }else{
1033 memset(&((char*)pBuf)[got], 0, amt-got);
1034 return SQLITE_IOERR_SHORT_READ;
1035 }
1036}
1037
1038/*
1039** Seek to the offset in id->offset then read cnt bytes into pBuf.
1040** Return the number of bytes actually read. Update the offset.
1041*/
1042static int seekAndWrite(unixFile *id, const void *pBuf, int cnt){
1043 int got;
1044 i64 newOffset;
1045 TIMER_START;
1046#if defined(USE_PREAD)
1047 got = pwrite(id->h, pBuf, cnt, id->offset);
1048#elif defined(USE_PREAD64)
1049 got = pwrite64(id->h, pBuf, cnt, id->offset);
1050#else
1051 newOffset = lseek(id->h, id->offset, SEEK_SET);
1052 if( newOffset!=id->offset ){
1053 return -1;
1054 }
1055 got = write(id->h, pBuf, cnt);
1056#endif
1057 TIMER_END;
1058 OSTRACE5("WRITE %-3d %5d %7lld %d\n", id->h, got, id->offset, TIMER_ELAPSED);
1059 if( got>0 ){
1060 id->offset += got;
1061 }
1062 return got;
1063}
1064
1065
1066/*
1067** Write data from a buffer into a file. Return SQLITE_OK on success
1068** or some other error code on failure.
1069*/
1070static int unixWrite(OsFile *id, const void *pBuf, int amt){
1071 int wrote = 0;
1072 assert( id );
1073 assert( amt>0 );
1074 while( amt>0 && (wrote = seekAndWrite((unixFile*)id, pBuf, amt))>0 ){
1075 amt -= wrote;
1076 pBuf = &((char*)pBuf)[wrote];
1077 }
1078 SimulateIOError(( wrote=(-1), amt=1 ));
1079 SimulateDiskfullError(( wrote=0, amt=1 ));
1080 if( amt>0 ){
1081 if( wrote<0 ){
1082 return SQLITE_IOERR_WRITE;
1083 }else{
1084 return SQLITE_FULL;
1085 }
1086 }
1087 return SQLITE_OK;
1088}
1089
1090/*
1091** Move the read/write pointer in a file.
1092*/
1093static int unixSeek(OsFile *id, i64 offset){
1094 assert( id );
1095#ifdef SQLITE_TEST
1096 if( offset ) SimulateDiskfullError(return SQLITE_FULL);
1097#endif
1098 ((unixFile*)id)->offset = offset;
1099 return SQLITE_OK;
1100}
1101
1102#ifdef SQLITE_TEST
1103/*
1104** Count the number of fullsyncs and normal syncs. This is used to test
1105** that syncs and fullsyncs are occuring at the right times.
1106*/
1107int sqlite3_sync_count = 0;
1108int sqlite3_fullsync_count = 0;
1109#endif
1110
1111/*
1112** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
1113** Otherwise use fsync() in its place.
1114*/
1115#ifndef HAVE_FDATASYNC
1116# define fdatasync fsync
1117#endif
1118
1119/*
1120** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
1121** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
1122** only available on Mac OS X. But that could change.
1123*/
1124#ifdef F_FULLFSYNC
1125# define HAVE_FULLFSYNC 1
1126#else
1127# define HAVE_FULLFSYNC 0
1128#endif
1129
1130
1131/*
1132** The fsync() system call does not work as advertised on many
1133** unix systems. The following procedure is an attempt to make
1134** it work better.
1135**
1136** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
1137** for testing when we want to run through the test suite quickly.
1138** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
1139** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
1140** or power failure will likely corrupt the database file.
1141*/
1142static int full_fsync(int fd, int fullSync, int dataOnly){
1143 int rc;
1144
1145 /* Record the number of times that we do a normal fsync() and
1146 ** FULLSYNC. This is used during testing to verify that this procedure
1147 ** gets called with the correct arguments.
1148 */
1149#ifdef SQLITE_TEST
1150 if( fullSync ) sqlite3_fullsync_count++;
1151 sqlite3_sync_count++;
1152#endif
1153
1154 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
1155 ** no-op
1156 */
1157#ifdef SQLITE_NO_SYNC
1158 rc = SQLITE_OK;
1159#else
1160
1161#if HAVE_FULLFSYNC
1162 if( fullSync ){
1163 rc = fcntl(fd, F_FULLFSYNC, 0);
1164 }else{
1165 rc = 1;
1166 }
1167 /* If the FULLFSYNC failed, fall back to attempting an fsync().
1168 * It shouldn't be possible for fullfsync to fail on the local
1169 * file system (on OSX), so failure indicates that FULLFSYNC
1170 * isn't supported for this file system. So, attempt an fsync
1171 * and (for now) ignore the overhead of a superfluous fcntl call.
1172 * It'd be better to detect fullfsync support once and avoid
1173 * the fcntl call every time sync is called.
1174 */
1175 if( rc ) rc = fsync(fd);
1176
1177#else
1178 if( dataOnly ){
1179 rc = fdatasync(fd);
1180 }else{
1181 rc = fsync(fd);
1182 }
1183#endif /* HAVE_FULLFSYNC */
1184#endif /* defined(SQLITE_NO_SYNC) */
1185
1186 return rc;
1187}
1188
1189/*
1190** Make sure all writes to a particular file are committed to disk.
1191**
1192** If dataOnly==0 then both the file itself and its metadata (file
1193** size, access time, etc) are synced. If dataOnly!=0 then only the
1194** file data is synced.
1195**
1196** Under Unix, also make sure that the directory entry for the file
1197** has been created by fsync-ing the directory that contains the file.
1198** If we do not do this and we encounter a power failure, the directory
1199** entry for the journal might not exist after we reboot. The next
1200** SQLite to access the file will not know that the journal exists (because
1201** the directory entry for the journal was never created) and the transaction
1202** will not roll back - possibly leading to database corruption.
1203*/
1204static int unixSync(OsFile *id, int dataOnly){
1205 int rc;
1206 unixFile *pFile = (unixFile*)id;
1207 assert( pFile );
1208 OSTRACE2("SYNC %-3d\n", pFile->h);
1209 rc = full_fsync(pFile->h, pFile->fullSync, dataOnly);
1210 SimulateIOError( rc=1 );
1211 if( rc ){
1212 return SQLITE_IOERR_FSYNC;
1213 }
1214 if( pFile->dirfd>=0 ){
1215 OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
1216 HAVE_FULLFSYNC, pFile->fullSync);
1217#ifndef SQLITE_DISABLE_DIRSYNC
1218 /* The directory sync is only attempted if full_fsync is
1219 ** turned off or unavailable. If a full_fsync occurred above,
1220 ** then the directory sync is superfluous.
1221 */
1222 if( (!HAVE_FULLFSYNC || !pFile->fullSync) && full_fsync(pFile->dirfd,0,0) ){
1223 /*
1224 ** We have received multiple reports of fsync() returning
1225 ** errors when applied to directories on certain file systems.
1226 ** A failed directory sync is not a big deal. So it seems
1227 ** better to ignore the error. Ticket #1657
1228 */
1229 /* return SQLITE_IOERR; */
1230 }
1231#endif
1232 close(pFile->dirfd); /* Only need to sync once, so close the directory */
1233 pFile->dirfd = -1; /* when we are done. */
1234 }
1235 return SQLITE_OK;
1236}
1237
1238/*
1239** Sync the directory zDirname. This is a no-op on operating systems other
1240** than UNIX.
1241**
1242** This is used to make sure the master journal file has truely been deleted
1243** before making changes to individual journals on a multi-database commit.
1244** The F_FULLFSYNC option is not needed here.
1245*/
1246int sqlite3UnixSyncDirectory(const char *zDirname){
1247#ifdef SQLITE_DISABLE_DIRSYNC
1248 return SQLITE_OK;
1249#else
1250 int fd;
1251 int r;
1252 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
1253 OSTRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
1254 if( fd<0 ){
1255 return SQLITE_CANTOPEN;
1256 }
1257 r = fsync(fd);
1258 close(fd);
1259 SimulateIOError( r=1 );
1260 if( r ){
1261 return SQLITE_IOERR_DIR_FSYNC;
1262 }else{
1263 return SQLITE_OK;
1264 }
1265#endif
1266}
1267
1268/*
1269** Truncate an open file to a specified size
1270*/
1271static int unixTruncate(OsFile *id, i64 nByte){
1272 int rc;
1273 assert( id );
1274 rc = ftruncate(((unixFile*)id)->h, nByte);
1275 SimulateIOError( rc=1 );
1276 if( rc ){
1277 return SQLITE_IOERR_TRUNCATE;
1278 }else{
1279 return SQLITE_OK;
1280 }
1281}
1282
1283/*
1284** Determine the current size of a file in bytes
1285*/
1286static int unixFileSize(OsFile *id, i64 *pSize){
1287 int rc;
1288 struct stat buf;
1289 assert( id );
1290 rc = fstat(((unixFile*)id)->h, &buf);
1291 SimulateIOError( rc=1 );
1292 if( rc!=0 ){
1293 return SQLITE_IOERR_FSTAT;
1294 }
1295 *pSize = buf.st_size;
1296 return SQLITE_OK;
1297}
1298
1299/*
1300** This routine checks if there is a RESERVED lock held on the specified
1301** file by this or any other process. If such a lock is held, return
1302** non-zero. If the file is unlocked or holds only SHARED locks, then
1303** return zero.
1304*/
1305static int unixCheckReservedLock(OsFile *id){
1306 int r = 0;
1307 unixFile *pFile = (unixFile*)id;
1308
1309 assert( pFile );
1310 sqlite3OsEnterMutex(); /* Because pFile->pLock is shared across threads */
1311
1312 /* Check if a thread in this process holds such a lock */
1313 if( pFile->pLock->locktype>SHARED_LOCK ){
1314 r = 1;
1315 }
1316
1317 /* Otherwise see if some other process holds it.
1318 */
1319 if( !r ){
1320 struct flock lock;
1321 lock.l_whence = SEEK_SET;
1322 lock.l_start = RESERVED_BYTE;
1323 lock.l_len = 1;
1324 lock.l_type = F_WRLCK;
1325 fcntl(pFile->h, F_GETLK, &lock);
1326 if( lock.l_type!=F_UNLCK ){
1327 r = 1;
1328 }
1329 }
1330
1331 sqlite3OsLeaveMutex();
1332 OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
1333
1334 return r;
1335}
1336
1337/*
1338** Lock the file with the lock specified by parameter locktype - one
1339** of the following:
1340**
1341** (1) SHARED_LOCK
1342** (2) RESERVED_LOCK
1343** (3) PENDING_LOCK
1344** (4) EXCLUSIVE_LOCK
1345**
1346** Sometimes when requesting one lock state, additional lock states
1347** are inserted in between. The locking might fail on one of the later
1348** transitions leaving the lock state different from what it started but
1349** still short of its goal. The following chart shows the allowed
1350** transitions and the inserted intermediate states:
1351**
1352** UNLOCKED -> SHARED
1353** SHARED -> RESERVED
1354** SHARED -> (PENDING) -> EXCLUSIVE
1355** RESERVED -> (PENDING) -> EXCLUSIVE
1356** PENDING -> EXCLUSIVE
1357**
1358** This routine will only increase a lock. Use the sqlite3OsUnlock()
1359** routine to lower a locking level.
1360*/
1361static int unixLock(OsFile *id, int locktype){
1362 /* The following describes the implementation of the various locks and
1363 ** lock transitions in terms of the POSIX advisory shared and exclusive
1364 ** lock primitives (called read-locks and write-locks below, to avoid
1365 ** confusion with SQLite lock names). The algorithms are complicated
1366 ** slightly in order to be compatible with windows systems simultaneously
1367 ** accessing the same database file, in case that is ever required.
1368 **
1369 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1370 ** byte', each single bytes at well known offsets, and the 'shared byte
1371 ** range', a range of 510 bytes at a well known offset.
1372 **
1373 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1374 ** byte'. If this is successful, a random byte from the 'shared byte
1375 ** range' is read-locked and the lock on the 'pending byte' released.
1376 **
1377 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1378 ** A RESERVED lock is implemented by grabbing a write-lock on the
1379 ** 'reserved byte'.
1380 **
1381 ** A process may only obtain a PENDING lock after it has obtained a
1382 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1383 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1384 ** obtained, but existing SHARED locks are allowed to persist. A process
1385 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1386 ** This property is used by the algorithm for rolling back a journal file
1387 ** after a crash.
1388 **
1389 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1390 ** implemented by obtaining a write-lock on the entire 'shared byte
1391 ** range'. Since all other locks require a read-lock on one of the bytes
1392 ** within this range, this ensures that no other locks are held on the
1393 ** database.
1394 **
1395 ** The reason a single byte cannot be used instead of the 'shared byte
1396 ** range' is that some versions of windows do not support read-locks. By
1397 ** locking a random byte from a range, concurrent SHARED locks may exist
1398 ** even if the locking primitive used is always a write-lock.
1399 */
1400 int rc = SQLITE_OK;
1401 unixFile *pFile = (unixFile*)id;
1402 struct lockInfo *pLock = pFile->pLock;
1403 struct flock lock;
1404 int s;
1405
1406 assert( pFile );
1407 OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
1408 locktypeName(locktype), locktypeName(pFile->locktype),
1409 locktypeName(pLock->locktype), pLock->cnt , getpid());
1410
1411 /* If there is already a lock of this type or more restrictive on the
1412 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1413 ** sqlite3OsEnterMutex() hasn't been called yet.
1414 */
1415 if( pFile->locktype>=locktype ){
1416 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1417 locktypeName(locktype));
1418 return SQLITE_OK;
1419 }
1420
1421 /* Make sure the locking sequence is correct
1422 */
1423 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1424 assert( locktype!=PENDING_LOCK );
1425 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1426
1427 /* This mutex is needed because pFile->pLock is shared across threads
1428 */
1429 sqlite3OsEnterMutex();
1430
1431 /* Make sure the current thread owns the pFile.
1432 */
1433 rc = transferOwnership(pFile);
1434 if( rc!=SQLITE_OK ){
1435 sqlite3OsLeaveMutex();
1436 return rc;
1437 }
1438 pLock = pFile->pLock;
1439
1440 /* If some thread using this PID has a lock via a different OsFile*
1441 ** handle that precludes the requested lock, return BUSY.
1442 */
1443 if( (pFile->locktype!=pLock->locktype &&
1444 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
1445 ){
1446 rc = SQLITE_BUSY;
1447 goto end_lock;
1448 }
1449
1450 /* If a SHARED lock is requested, and some thread using this PID already
1451 ** has a SHARED or RESERVED lock, then increment reference counts and
1452 ** return SQLITE_OK.
1453 */
1454 if( locktype==SHARED_LOCK &&
1455 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1456 assert( locktype==SHARED_LOCK );
1457 assert( pFile->locktype==0 );
1458 assert( pLock->cnt>0 );
1459 pFile->locktype = SHARED_LOCK;
1460 pLock->cnt++;
1461 pFile->pOpen->nLock++;
1462 goto end_lock;
1463 }
1464
1465 lock.l_len = 1L;
1466
1467 lock.l_whence = SEEK_SET;
1468
1469 /* A PENDING lock is needed before acquiring a SHARED lock and before
1470 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1471 ** be released.
1472 */
1473 if( locktype==SHARED_LOCK
1474 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1475 ){
1476 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
1477 lock.l_start = PENDING_BYTE;
1478 s = fcntl(pFile->h, F_SETLK, &lock);
1479 if( s==(-1) ){
1480 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1481 goto end_lock;
1482 }
1483 }
1484
1485
1486 /* If control gets to this point, then actually go ahead and make
1487 ** operating system calls for the specified lock.
1488 */
1489 if( locktype==SHARED_LOCK ){
1490 assert( pLock->cnt==0 );
1491 assert( pLock->locktype==0 );
1492
1493 /* Now get the read-lock */
1494 lock.l_start = SHARED_FIRST;
1495 lock.l_len = SHARED_SIZE;
1496 s = fcntl(pFile->h, F_SETLK, &lock);
1497
1498 /* Drop the temporary PENDING lock */
1499 lock.l_start = PENDING_BYTE;
1500 lock.l_len = 1L;
1501 lock.l_type = F_UNLCK;
1502 if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
1503 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1504 goto end_lock;
1505 }
1506 if( s==(-1) ){
1507 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1508 }else{
1509 pFile->locktype = SHARED_LOCK;
1510 pFile->pOpen->nLock++;
1511 pLock->cnt = 1;
1512 }
1513 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1514 /* We are trying for an exclusive lock but another thread in this
1515 ** same process is still holding a shared lock. */
1516 rc = SQLITE_BUSY;
1517 }else{
1518 /* The request was for a RESERVED or EXCLUSIVE lock. It is
1519 ** assumed that there is a SHARED or greater lock on the file
1520 ** already.
1521 */
1522 assert( 0!=pFile->locktype );
1523 lock.l_type = F_WRLCK;
1524 switch( locktype ){
1525 case RESERVED_LOCK:
1526 lock.l_start = RESERVED_BYTE;
1527 break;
1528 case EXCLUSIVE_LOCK:
1529 lock.l_start = SHARED_FIRST;
1530 lock.l_len = SHARED_SIZE;
1531 break;
1532 default:
1533 assert(0);
1534 }
1535 s = fcntl(pFile->h, F_SETLK, &lock);
1536 if( s==(-1) ){
1537 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1538 }
1539 }
1540
1541 if( rc==SQLITE_OK ){
1542 pFile->locktype = locktype;
1543 pLock->locktype = locktype;
1544 }else if( locktype==EXCLUSIVE_LOCK ){
1545 pFile->locktype = PENDING_LOCK;
1546 pLock->locktype = PENDING_LOCK;
1547 }
1548
1549end_lock:
1550 sqlite3OsLeaveMutex();
1551 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1552 rc==SQLITE_OK ? "ok" : "failed");
1553 return rc;
1554}
1555
1556/*
1557** Lower the locking level on file descriptor pFile to locktype. locktype
1558** must be either NO_LOCK or SHARED_LOCK.
1559**
1560** If the locking level of the file descriptor is already at or below
1561** the requested locking level, this routine is a no-op.
1562*/
1563static int unixUnlock(OsFile *id, int locktype){
1564 struct lockInfo *pLock;
1565 struct flock lock;
1566 int rc = SQLITE_OK;
1567 unixFile *pFile = (unixFile*)id;
1568
1569 assert( pFile );
1570 OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
1571 pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
1572
1573 assert( locktype<=SHARED_LOCK );
1574 if( pFile->locktype<=locktype ){
1575 return SQLITE_OK;
1576 }
1577 if( CHECK_THREADID(pFile) ){
1578 return SQLITE_MISUSE;
1579 }
1580 sqlite3OsEnterMutex();
1581 pLock = pFile->pLock;
1582 assert( pLock->cnt!=0 );
1583 if( pFile->locktype>SHARED_LOCK ){
1584 assert( pLock->locktype==pFile->locktype );
1585 if( locktype==SHARED_LOCK ){
1586 lock.l_type = F_RDLCK;
1587 lock.l_whence = SEEK_SET;
1588 lock.l_start = SHARED_FIRST;
1589 lock.l_len = SHARED_SIZE;
1590 if( fcntl(pFile->h, F_SETLK, &lock)==(-1) ){
1591 /* This should never happen */
1592 rc = SQLITE_IOERR_RDLOCK;
1593 }
1594 }
1595 lock.l_type = F_UNLCK;
1596 lock.l_whence = SEEK_SET;
1597 lock.l_start = PENDING_BYTE;
1598 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
1599 if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
1600 pLock->locktype = SHARED_LOCK;
1601 }else{
1602 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1603 }
1604 }
1605 if( locktype==NO_LOCK ){
1606 struct openCnt *pOpen;
1607
1608 /* Decrement the shared lock counter. Release the lock using an
1609 ** OS call only when all threads in this same process have released
1610 ** the lock.
1611 */
1612 pLock->cnt--;
1613 if( pLock->cnt==0 ){
1614 lock.l_type = F_UNLCK;
1615 lock.l_whence = SEEK_SET;
1616 lock.l_start = lock.l_len = 0L;
1617 if( fcntl(pFile->h, F_SETLK, &lock)!=(-1) ){
1618 pLock->locktype = NO_LOCK;
1619 }else{
1620 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1621 }
1622 }
1623
1624 /* Decrement the count of locks against this same file. When the
1625 ** count reaches zero, close any other file descriptors whose close
1626 ** was deferred because of outstanding locks.
1627 */
1628 pOpen = pFile->pOpen;
1629 pOpen->nLock--;
1630 assert( pOpen->nLock>=0 );
1631 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1632 int i;
1633 for(i=0; i<pOpen->nPending; i++){
1634 close(pOpen->aPending[i]);
1635 }
1636 free(pOpen->aPending);
1637 pOpen->nPending = 0;
1638 pOpen->aPending = 0;
1639 }
1640 }
1641 sqlite3OsLeaveMutex();
1642 pFile->locktype = locktype;
1643 return rc;
1644}
1645
1646/*
1647** Close a file.
1648*/
1649static int unixClose(OsFile **pId){
1650 unixFile *id = (unixFile*)*pId;
1651
1652 if( !id ) return SQLITE_OK;
1653 unixUnlock(*pId, NO_LOCK);
1654 if( id->dirfd>=0 ) close(id->dirfd);
1655 id->dirfd = -1;
1656 sqlite3OsEnterMutex();
1657
1658 if( id->pOpen->nLock ){
1659 /* If there are outstanding locks, do not actually close the file just
1660 ** yet because that would clear those locks. Instead, add the file
1661 ** descriptor to pOpen->aPending. It will be automatically closed when
1662 ** the last lock is cleared.
1663 */
1664 int *aNew;
1665 struct openCnt *pOpen = id->pOpen;
1666 aNew = realloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
1667 if( aNew==0 ){
1668 /* If a malloc fails, just leak the file descriptor */
1669 }else{
1670 pOpen->aPending = aNew;
1671 pOpen->aPending[pOpen->nPending] = id->h;
1672 pOpen->nPending++;
1673 }
1674 }else{
1675 /* There are no outstanding locks so we can close the file immediately */
1676 close(id->h);
1677 }
1678 releaseLockInfo(id->pLock);
1679 releaseOpenCnt(id->pOpen);
1680
1681 sqlite3OsLeaveMutex();
1682 id->isOpen = 0;
1683 OSTRACE2("CLOSE %-3d\n", id->h);
1684 OpenCounter(-1);
1685 sqlite3ThreadSafeFree(id);
1686 *pId = 0;
1687 return SQLITE_OK;
1688}
1689
1690
1691#ifdef SQLITE_ENABLE_LOCKING_STYLE
1692#pragma mark AFP Support
1693
1694/*
1695 ** The afpLockingContext structure contains all afp lock specific state
1696 */
1697typedef struct afpLockingContext afpLockingContext;
1698struct afpLockingContext {
1699 unsigned long long sharedLockByte;
1700 char *filePath;
1701};
1702
1703struct ByteRangeLockPB2
1704{
1705 unsigned long long offset; /* offset to first byte to lock */
1706 unsigned long long length; /* nbr of bytes to lock */
1707 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
1708 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
1709 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
1710 int fd; /* file desc to assoc this lock with */
1711};
1712
1713#define afpfsByteRangeLock2FSCTL_IOWR('z', 23, struct ByteRangeLockPB2)
1714
1715/* return 0 on success, 1 on failure. To match the behavior of the
1716 normal posix file locking (used in unixLock for example), we should
1717 provide 'richer' return codes - specifically to differentiate between
1718 'file busy' and 'file system error' results */
1719static int _AFPFSSetLock(const char *path, int fd, unsigned long long offset,
1720 unsigned long long length, int setLockFlag)
1721{
1722 struct ByteRangeLockPB2pb;
1723 int err;
1724
1725 pb.unLockFlag = setLockFlag ? 0 : 1;
1726 pb.startEndFlag = 0;
1727 pb.offset = offset;
1728 pb.length = length;
1729 pb.fd = fd;
1730 OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n",
1731 (setLockFlag?"ON":"OFF"), fd, offset, length);
1732 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
1733 if ( err==-1 ) {
1734 OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, errno,
1735 strerror(errno));
1736 return 1; /* error */
1737 } else {
1738 return 0;
1739 }
1740}
1741
1742/*
1743 ** This routine checks if there is a RESERVED lock held on the specified
1744 ** file by this or any other process. If such a lock is held, return
1745 ** non-zero. If the file is unlocked or holds only SHARED locks, then
1746 ** return zero.
1747 */
1748static int afpUnixCheckReservedLock(OsFile *id){
1749 int r = 0;
1750 unixFile *pFile = (unixFile*)id;
1751
1752 assert( pFile );
1753 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1754
1755 /* Check if a thread in this process holds such a lock */
1756 if( pFile->locktype>SHARED_LOCK ){
1757 r = 1;
1758 }
1759
1760 /* Otherwise see if some other process holds it.
1761 */
1762 if ( !r ) {
1763 /* lock the byte */
1764 int failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
1765 if (failed) {
1766 /* if we failed to get the lock then someone else must have it */
1767 r = 1;
1768 } else {
1769 /* if we succeeded in taking the reserved lock, unlock it to restore
1770 ** the original state */
1771 _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0);
1772 }
1773 }
1774 OSTRACE3("TEST WR-LOCK %d %d\n", pFile->h, r);
1775
1776 return r;
1777}
1778
1779/* AFP-style locking following the behavior of unixLock, see the unixLock
1780** function comments for details of lock management. */
1781static int afpUnixLock(OsFile *id, int locktype)
1782{
1783 int rc = SQLITE_OK;
1784 unixFile *pFile = (unixFile*)id;
1785 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1786 int gotPendingLock = 0;
1787
1788 assert( pFile );
1789 OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
1790 locktypeName(locktype), locktypeName(pFile->locktype), getpid());
1791 /* If there is already a lock of this type or more restrictive on the
1792 ** OsFile, do nothing. Don't use the afp_end_lock: exit path, as
1793 ** sqlite3OsEnterMutex() hasn't been called yet.
1794 */
1795 if( pFile->locktype>=locktype ){
1796 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
1797 locktypeName(locktype));
1798 return SQLITE_OK;
1799 }
1800
1801 /* Make sure the locking sequence is correct
1802 */
1803 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1804 assert( locktype!=PENDING_LOCK );
1805 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1806
1807 /* This mutex is needed because pFile->pLock is shared across threads
1808 */
1809 sqlite3OsEnterMutex();
1810
1811 /* Make sure the current thread owns the pFile.
1812 */
1813 rc = transferOwnership(pFile);
1814 if( rc!=SQLITE_OK ){
1815 sqlite3OsLeaveMutex();
1816 return rc;
1817 }
1818
1819 /* A PENDING lock is needed before acquiring a SHARED lock and before
1820 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1821 ** be released.
1822 */
1823 if( locktype==SHARED_LOCK
1824 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
1825 ){
1826 int failed = _AFPFSSetLock(context->filePath, pFile->h,
1827 PENDING_BYTE, 1, 1);
1828 if (failed) {
1829 rc = SQLITE_BUSY;
1830 goto afp_end_lock;
1831 }
1832 }
1833
1834 /* If control gets to this point, then actually go ahead and make
1835 ** operating system calls for the specified lock.
1836 */
1837 if( locktype==SHARED_LOCK ){
1838 int lk, failed;
1839 int tries = 0;
1840
1841 /* Now get the read-lock */
1842 /* note that the quality of the randomness doesn't matter that much */
1843 lk = random();
1844 context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
1845 failed = _AFPFSSetLock(context->filePath, pFile->h,
1846 SHARED_FIRST+context->sharedLockByte, 1, 1);
1847
1848 /* Drop the temporary PENDING lock */
1849 if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)) {
1850 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1851 goto afp_end_lock;
1852 }
1853
1854 if( failed ){
1855 rc = SQLITE_BUSY;
1856 } else {
1857 pFile->locktype = SHARED_LOCK;
1858 }
1859 }else{
1860 /* The request was for a RESERVED or EXCLUSIVE lock. It is
1861 ** assumed that there is a SHARED or greater lock on the file
1862 ** already.
1863 */
1864 int failed = 0;
1865 assert( 0!=pFile->locktype );
1866 if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
1867 /* Acquire a RESERVED lock */
1868 failed = _AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1,1);
1869 }
1870 if (!failed && locktype == EXCLUSIVE_LOCK) {
1871 /* Acquire an EXCLUSIVE lock */
1872
1873 /* Remove the shared lock before trying the range. we'll need to
1874 ** reestablish the shared lock if we can't get the afpUnixUnlock
1875 */
1876 if (!_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
1877 context->sharedLockByte, 1, 0)) {
1878 /* now attemmpt to get the exclusive lock range */
1879 failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
1880 SHARED_SIZE, 1);
1881 if (failed && _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST +
1882 context->sharedLockByte, 1, 1)) {
1883 rc = SQLITE_IOERR_RDLOCK; /* this should never happen */
1884 }
1885 } else {
1886 /* */
1887 rc = SQLITE_IOERR_UNLOCK; /* this should never happen */
1888 }
1889 }
1890 if( failed && rc == SQLITE_OK){
1891 rc = SQLITE_BUSY;
1892 }
1893 }
1894
1895 if( rc==SQLITE_OK ){
1896 pFile->locktype = locktype;
1897 }else if( locktype==EXCLUSIVE_LOCK ){
1898 pFile->locktype = PENDING_LOCK;
1899 }
1900
1901afp_end_lock:
1902 sqlite3OsLeaveMutex();
1903 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1904 rc==SQLITE_OK ? "ok" : "failed");
1905 return rc;
1906}
1907
1908/*
1909 ** Lower the locking level on file descriptor pFile to locktype. locktype
1910 ** must be either NO_LOCK or SHARED_LOCK.
1911 **
1912 ** If the locking level of the file descriptor is already at or below
1913 ** the requested locking level, this routine is a no-op.
1914 */
1915static int afpUnixUnlock(OsFile *id, int locktype) {
1916 struct flock lock;
1917 int rc = SQLITE_OK;
1918 unixFile *pFile = (unixFile*)id;
1919 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1920
1921 assert( pFile );
1922 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
1923 pFile->locktype, getpid());
1924
1925 assert( locktype<=SHARED_LOCK );
1926 if( pFile->locktype<=locktype ){
1927 return SQLITE_OK;
1928 }
1929 if( CHECK_THREADID(pFile) ){
1930 return SQLITE_MISUSE;
1931 }
1932 sqlite3OsEnterMutex();
1933 if( pFile->locktype>SHARED_LOCK ){
1934 if( locktype==SHARED_LOCK ){
1935 int failed = 0;
1936
1937 /* unlock the exclusive range - then re-establish the shared lock */
1938 if (pFile->locktype==EXCLUSIVE_LOCK) {
1939 failed = _AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST,
1940 SHARED_SIZE, 0);
1941 if (!failed) {
1942 /* successfully removed the exclusive lock */
1943 if (_AFPFSSetLock(context->filePath, pFile->h, SHARED_FIRST+
1944 context->sharedLockByte, 1, 1)) {
1945 /* failed to re-establish our shared lock */
1946 rc = SQLITE_IOERR_RDLOCK; /* This should never happen */
1947 }
1948 } else {
1949 /* This should never happen - failed to unlock the exclusive range */
1950 rc = SQLITE_IOERR_UNLOCK;
1951 }
1952 }
1953 }
1954 if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
1955 if (_AFPFSSetLock(context->filePath, pFile->h, PENDING_BYTE, 1, 0)){
1956 /* failed to release the pending lock */
1957 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1958 }
1959 }
1960 if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
1961 if (_AFPFSSetLock(context->filePath, pFile->h, RESERVED_BYTE, 1, 0)) {
1962 /* failed to release the reserved lock */
1963 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1964 }
1965 }
1966 }
1967 if( locktype==NO_LOCK ){
1968 int failed = _AFPFSSetLock(context->filePath, pFile->h,
1969 SHARED_FIRST + context->sharedLockByte, 1, 0);
1970 if (failed) {
1971 rc = SQLITE_IOERR_UNLOCK; /* This should never happen */
1972 }
1973 }
1974 if (rc == SQLITE_OK)
1975 pFile->locktype = locktype;
1976 sqlite3OsLeaveMutex();
1977 return rc;
1978}
1979
1980/*
1981 ** Close a file & cleanup AFP specific locking context
1982 */
1983static int afpUnixClose(OsFile **pId) {
1984 unixFile *id = (unixFile*)*pId;
1985
1986 if( !id ) return SQLITE_OK;
1987 afpUnixUnlock(*pId, NO_LOCK);
1988 /* free the AFP locking structure */
1989 if (id->lockingContext != NULL) {
1990 if (((afpLockingContext *)id->lockingContext)->filePath != NULL)
1991 sqlite3ThreadSafeFree(((afpLockingContext*)id->lockingContext)->filePath);
1992 sqlite3ThreadSafeFree(id->lockingContext);
1993 }
1994
1995 if( id->dirfd>=0 ) close(id->dirfd);
1996 id->dirfd = -1;
1997 close(id->h);
1998 id->isOpen = 0;
1999 OSTRACE2("CLOSE %-3d\n", id->h);
2000 OpenCounter(-1);
2001 sqlite3ThreadSafeFree(id);
2002 *pId = 0;
2003 return SQLITE_OK;
2004}
2005
2006
2007#pragma mark flock() style locking
2008
2009/*
2010 ** The flockLockingContext is not used
2011 */
2012typedef void flockLockingContext;
2013
2014static int flockUnixCheckReservedLock(OsFile *id) {
2015 unixFile *pFile = (unixFile*)id;
2016
2017 if (pFile->locktype == RESERVED_LOCK) {
2018 return 1; /* already have a reserved lock */
2019 } else {
2020 /* attempt to get the lock */
2021 int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
2022 if (!rc) {
2023 /* got the lock, unlock it */
2024 flock(pFile->h, LOCK_UN);
2025 return 0; /* no one has it reserved */
2026 }
2027 return 1; /* someone else might have it reserved */
2028 }
2029}
2030
2031static int flockUnixLock(OsFile *id, int locktype) {
2032 unixFile *pFile = (unixFile*)id;
2033
2034 /* if we already have a lock, it is exclusive.
2035 ** Just adjust level and punt on outta here. */
2036 if (pFile->locktype > NO_LOCK) {
2037 pFile->locktype = locktype;
2038 return SQLITE_OK;
2039 }
2040
2041 /* grab an exclusive lock */
2042 int rc = flock(pFile->h, LOCK_EX | LOCK_NB);
2043 if (rc) {
2044 /* didn't get, must be busy */
2045 return SQLITE_BUSY;
2046 } else {
2047 /* got it, set the type and return ok */
2048 pFile->locktype = locktype;
2049 return SQLITE_OK;
2050 }
2051}
2052
2053static int flockUnixUnlock(OsFile *id, int locktype) {
2054 unixFile *pFile = (unixFile*)id;
2055
2056 assert( locktype<=SHARED_LOCK );
2057
2058 /* no-op if possible */
2059 if( pFile->locktype==locktype ){
2060 return SQLITE_OK;
2061 }
2062
2063 /* shared can just be set because we always have an exclusive */
2064 if (locktype==SHARED_LOCK) {
2065 pFile->locktype = locktype;
2066 return SQLITE_OK;
2067 }
2068
2069 /* no, really, unlock. */
2070 int rc = flock(pFile->h, LOCK_UN);
2071 if (rc)
2072 return SQLITE_IOERR_UNLOCK;
2073 else {
2074 pFile->locktype = NO_LOCK;
2075 return SQLITE_OK;
2076 }
2077}
2078
2079/*
2080 ** Close a file.
2081 */
2082static int flockUnixClose(OsFile **pId) {
2083 unixFile *id = (unixFile*)*pId;
2084
2085 if( !id ) return SQLITE_OK;
2086 flockUnixUnlock(*pId, NO_LOCK);
2087
2088 if( id->dirfd>=0 ) close(id->dirfd);
2089 id->dirfd = -1;
2090 sqlite3OsEnterMutex();
2091
2092 close(id->h);
2093 sqlite3OsLeaveMutex();
2094 id->isOpen = 0;
2095 OSTRACE2("CLOSE %-3d\n", id->h);
2096 OpenCounter(-1);
2097 sqlite3ThreadSafeFree(id);
2098 *pId = 0;
2099 return SQLITE_OK;
2100}
2101
2102#pragma mark Old-School .lock file based locking
2103
2104/*
2105 ** The dotlockLockingContext structure contains all dotlock (.lock) lock
2106 ** specific state
2107 */
2108typedef struct dotlockLockingContext dotlockLockingContext;
2109struct dotlockLockingContext {
2110 char *lockPath;
2111};
2112
2113
2114static int dotlockUnixCheckReservedLock(OsFile *id) {
2115 unixFile *pFile = (unixFile*)id;
2116 dotlockLockingContext *context =
2117 (dotlockLockingContext *) pFile->lockingContext;
2118
2119 if (pFile->locktype == RESERVED_LOCK) {
2120 return 1; /* already have a reserved lock */
2121 } else {
2122 struct stat statBuf;
2123 if (lstat(context->lockPath,&statBuf) == 0)
2124 /* file exists, someone else has the lock */
2125 return 1;
2126 else
2127 /* file does not exist, we could have it if we want it */
2128 return 0;
2129 }
2130}
2131
2132static int dotlockUnixLock(OsFile *id, int locktype) {
2133 unixFile *pFile = (unixFile*)id;
2134 dotlockLockingContext *context =
2135 (dotlockLockingContext *) pFile->lockingContext;
2136
2137 /* if we already have a lock, it is exclusive.
2138 ** Just adjust level and punt on outta here. */
2139 if (pFile->locktype > NO_LOCK) {
2140 pFile->locktype = locktype;
2141
2142 /* Always update the timestamp on the old file */
2143 utimes(context->lockPath,NULL);
2144 return SQLITE_OK;
2145 }
2146
2147 /* check to see if lock file already exists */
2148 struct stat statBuf;
2149 if (lstat(context->lockPath,&statBuf) == 0){
2150 return SQLITE_BUSY; /* it does, busy */
2151 }
2152
2153 /* grab an exclusive lock */
2154 int fd = open(context->lockPath,O_RDONLY|O_CREAT|O_EXCL,0600);
2155 if (fd < 0) {
2156 /* failed to open/create the file, someone else may have stolen the lock */
2157 return SQLITE_BUSY;
2158 }
2159 close(fd);
2160
2161 /* got it, set the type and return ok */
2162 pFile->locktype = locktype;
2163 return SQLITE_OK;
2164}
2165
2166static int dotlockUnixUnlock(OsFile *id, int locktype) {
2167 unixFile *pFile = (unixFile*)id;
2168 dotlockLockingContext *context =
2169 (dotlockLockingContext *) pFile->lockingContext;
2170
2171 assert( locktype<=SHARED_LOCK );
2172
2173 /* no-op if possible */
2174 if( pFile->locktype==locktype ){
2175 return SQLITE_OK;
2176 }
2177
2178 /* shared can just be set because we always have an exclusive */
2179 if (locktype==SHARED_LOCK) {
2180 pFile->locktype = locktype;
2181 return SQLITE_OK;
2182 }
2183
2184 /* no, really, unlock. */
2185 unlink(context->lockPath);
2186 pFile->locktype = NO_LOCK;
2187 return SQLITE_OK;
2188}
2189
2190/*
2191 ** Close a file.
2192 */
2193static int dotlockUnixClose(OsFile **pId) {
2194 unixFile *id = (unixFile*)*pId;
2195
2196 if( !id ) return SQLITE_OK;
2197 dotlockUnixUnlock(*pId, NO_LOCK);
2198 /* free the dotlock locking structure */
2199 if (id->lockingContext != NULL) {
2200 if (((dotlockLockingContext *)id->lockingContext)->lockPath != NULL)
2201 sqlite3ThreadSafeFree( ( (dotlockLockingContext *)
2202 id->lockingContext)->lockPath);
2203 sqlite3ThreadSafeFree(id->lockingContext);
2204 }
2205
2206 if( id->dirfd>=0 ) close(id->dirfd);
2207 id->dirfd = -1;
2208 sqlite3OsEnterMutex();
2209
2210 close(id->h);
2211
2212 sqlite3OsLeaveMutex();
2213 id->isOpen = 0;
2214 OSTRACE2("CLOSE %-3d\n", id->h);
2215 OpenCounter(-1);
2216 sqlite3ThreadSafeFree(id);
2217 *pId = 0;
2218 return SQLITE_OK;
2219}
2220
2221
2222#pragma mark No locking
2223
2224/*
2225 ** The nolockLockingContext is void
2226 */
2227typedef void nolockLockingContext;
2228
2229static int nolockUnixCheckReservedLock(OsFile *id) {
2230 return 0;
2231}
2232
2233static int nolockUnixLock(OsFile *id, int locktype) {
2234 return SQLITE_OK;
2235}
2236
2237static int nolockUnixUnlock(OsFile *id, int locktype) {
2238 return SQLITE_OK;
2239}
2240
2241/*
2242 ** Close a file.
2243 */
2244static int nolockUnixClose(OsFile **pId) {
2245 unixFile *id = (unixFile*)*pId;
2246
2247 if( !id ) return SQLITE_OK;
2248 if( id->dirfd>=0 ) close(id->dirfd);
2249 id->dirfd = -1;
2250 sqlite3OsEnterMutex();
2251
2252 close(id->h);
2253
2254 sqlite3OsLeaveMutex();
2255 id->isOpen = 0;
2256 OSTRACE2("CLOSE %-3d\n", id->h);
2257 OpenCounter(-1);
2258 sqlite3ThreadSafeFree(id);
2259 *pId = 0;
2260 return SQLITE_OK;
2261}
2262
2263#endif /* SQLITE_ENABLE_LOCKING_STYLE */
2264
2265/*
2266** Turn a relative pathname into a full pathname. Return a pointer
2267** to the full pathname stored in space obtained from sqliteMalloc().
2268** The calling function is responsible for freeing this space once it
2269** is no longer needed.
2270*/
2271char *sqlite3UnixFullPathname(const char *zRelative){
2272 char *zFull = 0;
2273 if( zRelative[0]=='/' ){
2274 sqlite3SetString(&zFull, zRelative, (char*)0);
2275 }else{
2276 char *zBuf = sqliteMalloc(5000);
2277 if( zBuf==0 ){
2278 return 0;
2279 }
2280 zBuf[0] = 0;
2281 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
2282 (char*)0);
2283 sqliteFree(zBuf);
2284 }
2285
2286#if 0
2287 /*
2288 ** Remove "/./" path elements and convert "/A/./" path elements
2289 ** to just "/".
2290 */
2291 if( zFull ){
2292 int i, j;
2293 for(i=j=0; zFull[i]; i++){
2294 if( zFull[i]=='/' ){
2295 if( zFull[i+1]=='/' ) continue;
2296 if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
2297 i += 1;
2298 continue;
2299 }
2300 if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
2301 while( j>0 && zFull[j-1]!='/' ){ j--; }
2302 i += 3;
2303 continue;
2304 }
2305 }
2306 zFull[j++] = zFull[i];
2307 }
2308 zFull[j] = 0;
2309 }
2310#endif
2311
2312 return zFull;
2313}
2314
2315/*
2316** Change the value of the fullsync flag in the given file descriptor.
2317*/
2318static void unixSetFullSync(OsFile *id, int v){
2319 ((unixFile*)id)->fullSync = v;
2320}
2321
2322/*
2323** Return the underlying file handle for an OsFile
2324*/
2325static int unixFileHandle(OsFile *id){
2326 return ((unixFile*)id)->h;
2327}
2328
2329/*
2330** Return an integer that indices the type of lock currently held
2331** by this handle. (Used for testing and analysis only.)
2332*/
2333static int unixLockState(OsFile *id){
2334 return ((unixFile*)id)->locktype;
2335}
2336
2337/*
2338** Return the sector size in bytes of the underlying block device for
2339** the specified file. This is almost always 512 bytes, but may be
2340** larger for some devices.
2341**
2342** SQLite code assumes this function cannot fail. It also assumes that
2343** if two files are created in the same file-system directory (i.e.
2344** a database and it's journal file) that the sector size will be the
2345** same for both.
2346*/
2347static int unixSectorSize(OsFile *id){
2348 return SQLITE_DEFAULT_SECTOR_SIZE;
2349}
2350
2351/*
2352** This vector defines all the methods that can operate on an OsFile
2353** for unix.
2354*/
2355static const IoMethod sqlite3UnixIoMethod = {
2356 unixClose,
2357 unixOpenDirectory,
2358 unixRead,
2359 unixWrite,
2360 unixSeek,
2361 unixTruncate,
2362 unixSync,
2363 unixSetFullSync,
2364 unixFileHandle,
2365 unixFileSize,
2366 unixLock,
2367 unixUnlock,
2368 unixLockState,
2369 unixCheckReservedLock,
2370 unixSectorSize,
2371};
2372
2373#ifdef SQLITE_ENABLE_LOCKING_STYLE
2374/*
2375 ** This vector defines all the methods that can operate on an OsFile
2376 ** for unix with AFP style file locking.
2377 */
2378static const IoMethod sqlite3AFPLockingUnixIoMethod = {
2379 afpUnixClose,
2380 unixOpenDirectory,
2381 unixRead,
2382 unixWrite,
2383 unixSeek,
2384 unixTruncate,
2385 unixSync,
2386 unixSetFullSync,
2387 unixFileHandle,
2388 unixFileSize,
2389 afpUnixLock,
2390 afpUnixUnlock,
2391 unixLockState,
2392 afpUnixCheckReservedLock,
2393 unixSectorSize,
2394};
2395
2396/*
2397 ** This vector defines all the methods that can operate on an OsFile
2398 ** for unix with flock() style file locking.
2399 */
2400static const IoMethod sqlite3FlockLockingUnixIoMethod = {
2401 flockUnixClose,
2402 unixOpenDirectory,
2403 unixRead,
2404 unixWrite,
2405 unixSeek,
2406 unixTruncate,
2407 unixSync,
2408 unixSetFullSync,
2409 unixFileHandle,
2410 unixFileSize,
2411 flockUnixLock,
2412 flockUnixUnlock,
2413 unixLockState,
2414 flockUnixCheckReservedLock,
2415 unixSectorSize,
2416};
2417
2418/*
2419 ** This vector defines all the methods that can operate on an OsFile
2420 ** for unix with dotlock style file locking.
2421 */
2422static const IoMethod sqlite3DotlockLockingUnixIoMethod = {
2423 dotlockUnixClose,
2424 unixOpenDirectory,
2425 unixRead,
2426 unixWrite,
2427 unixSeek,
2428 unixTruncate,
2429 unixSync,
2430 unixSetFullSync,
2431 unixFileHandle,
2432 unixFileSize,
2433 dotlockUnixLock,
2434 dotlockUnixUnlock,
2435 unixLockState,
2436 dotlockUnixCheckReservedLock,
2437 unixSectorSize,
2438};
2439
2440/*
2441 ** This vector defines all the methods that can operate on an OsFile
2442 ** for unix with dotlock style file locking.
2443 */
2444static const IoMethod sqlite3NolockLockingUnixIoMethod = {
2445 nolockUnixClose,
2446 unixOpenDirectory,
2447 unixRead,
2448 unixWrite,
2449 unixSeek,
2450 unixTruncate,
2451 unixSync,
2452 unixSetFullSync,
2453 unixFileHandle,
2454 unixFileSize,
2455 nolockUnixLock,
2456 nolockUnixUnlock,
2457 unixLockState,
2458 nolockUnixCheckReservedLock,
2459 unixSectorSize,
2460};
2461
2462#endif /* SQLITE_ENABLE_LOCKING_STYLE */
2463
2464/*
2465** Allocate memory for a new unixFile and initialize that unixFile.
2466** Write a pointer to the new unixFile into *pId.
2467** If we run out of memory, close the file and return an error.
2468*/
2469#ifdef SQLITE_ENABLE_LOCKING_STYLE
2470/*
2471 ** When locking extensions are enabled, the filepath and locking style
2472 ** are needed to determine the unixFile pMethod to use for locking operations.
2473 ** The locking-style specific lockingContext data structure is created
2474 ** and assigned here also.
2475 */
2476static int allocateUnixFile(
2477 int h, /* Open file descriptor of file being opened */
2478 OsFile **pId, /* Write completed initialization here */
2479 const char *zFilename, /* Name of the file being opened */
2480 int delFlag /* Delete-on-or-before-close flag */
2481){
2482 sqlite3LockingStyle lockingStyle;
2483 unixFile *pNew;
2484 unixFile f;
2485 int rc;
2486
2487 memset(&f, 0, sizeof(f));
2488 lockingStyle = sqlite3DetectLockingStyle(zFilename, h);
2489 if ( lockingStyle == posixLockingStyle ) {
2490 sqlite3OsEnterMutex();
2491 rc = findLockInfo(h, &f.pLock, &f.pOpen);
2492 sqlite3OsLeaveMutex();
2493 if( rc ){
2494 close(h);
2495 unlink(zFilename);
2496 return SQLITE_NOMEM;
2497 }
2498 } else {
2499 /* pLock and pOpen are only used for posix advisory locking */
2500 f.pLock = NULL;
2501 f.pOpen = NULL;
2502 }
2503 if( delFlag ){
2504 unlink(zFilename);
2505 }
2506 f.dirfd = -1;
2507 f.h = h;
2508 SET_THREADID(&f);
2509 pNew = sqlite3ThreadSafeMalloc( sizeof(unixFile) );
2510 if( pNew==0 ){
2511 close(h);
2512 sqlite3OsEnterMutex();
2513 releaseLockInfo(f.pLock);
2514 releaseOpenCnt(f.pOpen);
2515 sqlite3OsLeaveMutex();
2516 *pId = 0;
2517 return SQLITE_NOMEM;
2518 }else{
2519 *pNew = f;
2520 switch(lockingStyle) {
2521 case afpLockingStyle: {
2522 /* afp locking uses the file path so it needs to be included in
2523 ** the afpLockingContext */
2524 int nFilename;
2525 pNew->pMethod = &sqlite3AFPLockingUnixIoMethod;
2526 pNew->lockingContext =
2527 sqlite3ThreadSafeMalloc(sizeof(afpLockingContext));
2528 nFilename = strlen(zFilename)+1;
2529 ((afpLockingContext *)pNew->lockingContext)->filePath =
2530 sqlite3ThreadSafeMalloc(nFilename);
2531 memcpy(((afpLockingContext *)pNew->lockingContext)->filePath,
2532 zFilename, nFilename);
2533 srandomdev();
2534 break;
2535 }
2536 case flockLockingStyle:
2537 /* flock locking doesn't need additional lockingContext information */
2538 pNew->pMethod = &sqlite3FlockLockingUnixIoMethod;
2539 break;
2540 case dotlockLockingStyle: {
2541 /* dotlock locking uses the file path so it needs to be included in
2542 ** the dotlockLockingContext */
2543 int nFilename;
2544 pNew->pMethod = &sqlite3DotlockLockingUnixIoMethod;
2545 pNew->lockingContext = sqlite3ThreadSafeMalloc(
2546 sizeof(dotlockLockingContext));
2547 nFilename = strlen(zFilename) + 6;
2548 ((dotlockLockingContext *)pNew->lockingContext)->lockPath =
2549 sqlite3ThreadSafeMalloc( nFilename );
2550 sqlite3_snprintf(nFilename,
2551 ((dotlockLockingContext *)pNew->lockingContext)->lockPath,
2552 "%s.lock", zFilename);
2553 break;
2554 }
2555 case posixLockingStyle:
2556 /* posix locking doesn't need additional lockingContext information */
2557 pNew->pMethod = &sqlite3UnixIoMethod;
2558 break;
2559 case noLockingStyle:
2560 case unsupportedLockingStyle:
2561 default:
2562 pNew->pMethod = &sqlite3NolockLockingUnixIoMethod;
2563 }
2564 *pId = (OsFile*)pNew;
2565 OpenCounter(+1);
2566 return SQLITE_OK;
2567 }
2568}
2569#else /* SQLITE_ENABLE_LOCKING_STYLE */
2570static int allocateUnixFile(
2571 int h, /* Open file descriptor on file being opened */
2572 OsFile **pId, /* Write the resul unixFile structure here */
2573 const char *zFilename, /* Name of the file being opened */
2574 int delFlag /* If true, delete the file on or before closing */
2575){
2576 unixFile *pNew;
2577 unixFile f;
2578 int rc;
2579
2580 memset(&f, 0, sizeof(f));
2581 sqlite3OsEnterMutex();
2582 rc = findLockInfo(h, &f.pLock, &f.pOpen);
2583 sqlite3OsLeaveMutex();
2584 if( delFlag ){
2585 unlink(zFilename);
2586 }
2587 if( rc ){
2588 close(h);
2589 return SQLITE_NOMEM;
2590 }
2591 OSTRACE3("OPEN %-3d %s\n", h, zFilename);
2592 f.dirfd = -1;
2593 f.h = h;
2594 SET_THREADID(&f);
2595 pNew = sqlite3ThreadSafeMalloc( sizeof(unixFile) );
2596 if( pNew==0 ){
2597 close(h);
2598 sqlite3OsEnterMutex();
2599 releaseLockInfo(f.pLock);
2600 releaseOpenCnt(f.pOpen);
2601 sqlite3OsLeaveMutex();
2602 *pId = 0;
2603 return SQLITE_NOMEM;
2604 }else{
2605 *pNew = f;
2606 pNew->pMethod = &sqlite3UnixIoMethod;
2607 *pId = (OsFile*)pNew;
2608 OpenCounter(+1);
2609 return SQLITE_OK;
2610 }
2611}
2612#endif /* SQLITE_ENABLE_LOCKING_STYLE */
2613
2614#endif /* SQLITE_OMIT_DISKIO */
2615/***************************************************************************
2616** Everything above deals with file I/O. Everything that follows deals
2617** with other miscellanous aspects of the operating system interface
2618****************************************************************************/
2619
2620
2621#ifndef SQLITE_OMIT_LOAD_EXTENSION
2622/*
2623** Interfaces for opening a shared library, finding entry points
2624** within the shared library, and closing the shared library.
2625*/
2626#include <dlfcn.h>
2627void *sqlite3UnixDlopen(const char *zFilename){
2628 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
2629}
2630void *sqlite3UnixDlsym(void *pHandle, const char *zSymbol){
2631 return dlsym(pHandle, zSymbol);
2632}
2633int sqlite3UnixDlclose(void *pHandle){
2634 return dlclose(pHandle);
2635}
2636#endif /* SQLITE_OMIT_LOAD_EXTENSION */
2637
2638/*
2639** Get information to seed the random number generator. The seed
2640** is written into the buffer zBuf[256]. The calling function must
2641** supply a sufficiently large buffer.
2642*/
2643int sqlite3UnixRandomSeed(char *zBuf){
2644 /* We have to initialize zBuf to prevent valgrind from reporting
2645 ** errors. The reports issued by valgrind are incorrect - we would
2646 ** prefer that the randomness be increased by making use of the
2647 ** uninitialized space in zBuf - but valgrind errors tend to worry
2648 ** some users. Rather than argue, it seems easier just to initialize
2649 ** the whole array and silence valgrind, even if that means less randomness
2650 ** in the random seed.
2651 **
2652 ** When testing, initializing zBuf[] to zero is all we do. That means
2653 ** that we always use the same random number sequence. This makes the
2654 ** tests repeatable.
2655 */
2656 memset(zBuf, 0, 256);
2657#if !defined(SQLITE_TEST)
2658 {
2659 int pid, fd;
2660 fd = open("/dev/urandom", O_RDONLY);
2661 if( fd<0 ){
2662 time_t t;
2663 time(&t);
2664 memcpy(zBuf, &t, sizeof(t));
2665 pid = getpid();
2666 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
2667 }else{
2668 read(fd, zBuf, 256);
2669 close(fd);
2670 }
2671 }
2672#endif
2673 return SQLITE_OK;
2674}
2675
2676/*
2677** Sleep for a little while. Return the amount of time slept.
2678** The argument is the number of milliseconds we want to sleep.
2679*/
2680int sqlite3UnixSleep(int ms){
2681#if defined(HAVE_USLEEP) && HAVE_USLEEP
2682 usleep(ms*1000);
2683 return ms;
2684#else
2685 sleep((ms+999)/1000);
2686 return 1000*((ms+999)/1000);
2687#endif
2688}
2689
2690/*
2691** Static variables used for thread synchronization.
2692**
2693** inMutex the nesting depth of the recursive mutex. The thread
2694** holding mutexMain can read this variable at any time.
2695** But is must hold mutexAux to change this variable. Other
2696** threads must hold mutexAux to read the variable and can
2697** never write.
2698**
2699** mutexOwner The thread id of the thread holding mutexMain. Same
2700** access rules as for inMutex.
2701**
2702** mutexOwnerValid True if the value in mutexOwner is valid. The same
2703** access rules apply as for inMutex.
2704**
2705** mutexMain The main mutex. Hold this mutex in order to get exclusive
2706** access to SQLite data structures.
2707**
2708** mutexAux An auxiliary mutex needed to access variables defined above.
2709**
2710** Mutexes are always acquired in this order: mutexMain mutexAux. It
2711** is not necessary to acquire mutexMain in order to get mutexAux - just
2712** do not attempt to acquire them in the reverse order: mutexAux mutexMain.
2713** Either get the mutexes with mutexMain first or get mutexAux only.
2714**
2715** When running on a platform where the three variables inMutex, mutexOwner,
2716** and mutexOwnerValid can be set atomically, the mutexAux is not required.
2717** On many systems, all three are 32-bit integers and writing to a 32-bit
2718** integer is atomic. I think. But there are no guarantees. So it seems
2719** safer to protect them using mutexAux.
2720*/
2721static int inMutex = 0;
2722#ifdef SQLITE_UNIX_THREADS
2723static pthread_t mutexOwner; /* Thread holding mutexMain */
2724static int mutexOwnerValid = 0; /* True if mutexOwner is valid */
2725static pthread_mutex_t mutexMain = PTHREAD_MUTEX_INITIALIZER; /* The mutex */
2726static pthread_mutex_t mutexAux = PTHREAD_MUTEX_INITIALIZER; /* Aux mutex */
2727#endif
2728
2729/*
2730** The following pair of routine implement mutual exclusion for
2731** multi-threaded processes. Only a single thread is allowed to
2732** executed code that is surrounded by EnterMutex() and LeaveMutex().
2733**
2734** SQLite uses only a single Mutex. There is not much critical
2735** code and what little there is executes quickly and without blocking.
2736**
2737** As of version 3.3.2, this mutex must be recursive.
2738*/
2739void sqlite3UnixEnterMutex(){
2740#ifdef SQLITE_UNIX_THREADS
2741 pthread_mutex_lock(&mutexAux);
2742 if( !mutexOwnerValid || !pthread_equal(mutexOwner, pthread_self()) ){
2743 pthread_mutex_unlock(&mutexAux);
2744 pthread_mutex_lock(&mutexMain);
2745 assert( inMutex==0 );
2746 assert( !mutexOwnerValid );
2747 pthread_mutex_lock(&mutexAux);
2748 mutexOwner = pthread_self();
2749 mutexOwnerValid = 1;
2750 }
2751 inMutex++;
2752 pthread_mutex_unlock(&mutexAux);
2753#else
2754 inMutex++;
2755#endif
2756}
2757void sqlite3UnixLeaveMutex(){
2758 assert( inMutex>0 );
2759#ifdef SQLITE_UNIX_THREADS
2760 pthread_mutex_lock(&mutexAux);
2761 inMutex--;
2762 assert( pthread_equal(mutexOwner, pthread_self()) );
2763 if( inMutex==0 ){
2764 assert( mutexOwnerValid );
2765 mutexOwnerValid = 0;
2766 pthread_mutex_unlock(&mutexMain);
2767 }
2768 pthread_mutex_unlock(&mutexAux);
2769#else
2770 inMutex--;
2771#endif
2772}
2773
2774/*
2775** Return TRUE if the mutex is currently held.
2776**
2777** If the thisThrd parameter is true, return true only if the
2778** calling thread holds the mutex. If the parameter is false, return
2779** true if any thread holds the mutex.
2780*/
2781int sqlite3UnixInMutex(int thisThrd){
2782#ifdef SQLITE_UNIX_THREADS
2783 int rc;
2784 pthread_mutex_lock(&mutexAux);
2785 rc = inMutex>0 && (thisThrd==0 || pthread_equal(mutexOwner,pthread_self()));
2786 pthread_mutex_unlock(&mutexAux);
2787 return rc;
2788#else
2789 return inMutex>0;
2790#endif
2791}
2792
2793/*
2794** Remember the number of thread-specific-data blocks allocated.
2795** Use this to verify that we are not leaking thread-specific-data.
2796** Ticket #1601
2797*/
2798#ifdef SQLITE_TEST
2799int sqlite3_tsd_count = 0;
2800# ifdef SQLITE_UNIX_THREADS
2801 static pthread_mutex_t tsd_counter_mutex = PTHREAD_MUTEX_INITIALIZER;
2802# define TSD_COUNTER(N) \
2803 pthread_mutex_lock(&tsd_counter_mutex); \
2804 sqlite3_tsd_count += N; \
2805 pthread_mutex_unlock(&tsd_counter_mutex);
2806# else
2807# define TSD_COUNTER(N) sqlite3_tsd_count += N
2808# endif
2809#else
2810# define TSD_COUNTER(N) /* no-op */
2811#endif
2812
2813/*
2814** If called with allocateFlag>0, then return a pointer to thread
2815** specific data for the current thread. Allocate and zero the
2816** thread-specific data if it does not already exist.
2817**
2818** If called with allocateFlag==0, then check the current thread
2819** specific data. Return it if it exists. If it does not exist,
2820** then return NULL.
2821**
2822** If called with allocateFlag<0, check to see if the thread specific
2823** data is allocated and is all zero. If it is then deallocate it.
2824** Return a pointer to the thread specific data or NULL if it is
2825** unallocated or gets deallocated.
2826*/
2827ThreadData *sqlite3UnixThreadSpecificData(int allocateFlag){
2828 static const ThreadData zeroData = {0}; /* Initializer to silence warnings
2829 ** from broken compilers */
2830#ifdef SQLITE_UNIX_THREADS
2831 static pthread_key_t key;
2832 static int keyInit = 0;
2833 ThreadData *pTsd;
2834
2835 if( !keyInit ){
2836 sqlite3OsEnterMutex();
2837 if( !keyInit ){
2838 int rc;
2839 rc = pthread_key_create(&key, 0);
2840 if( rc ){
2841 sqlite3OsLeaveMutex();
2842 return 0;
2843 }
2844 keyInit = 1;
2845 }
2846 sqlite3OsLeaveMutex();
2847 }
2848
2849 pTsd = pthread_getspecific(key);
2850 if( allocateFlag>0 ){
2851 if( pTsd==0 ){
2852 if( !sqlite3TestMallocFail() ){
2853 pTsd = sqlite3OsMalloc(sizeof(zeroData));
2854 }
2855#ifdef SQLITE_MEMDEBUG
2856 sqlite3_isFail = 0;
2857#endif
2858 if( pTsd ){
2859 *pTsd = zeroData;
2860 pthread_setspecific(key, pTsd);
2861 TSD_COUNTER(+1);
2862 }
2863 }
2864 }else if( pTsd!=0 && allocateFlag<0
2865 && memcmp(pTsd, &zeroData, sizeof(ThreadData))==0 ){
2866 sqlite3OsFree(pTsd);
2867 pthread_setspecific(key, 0);
2868 TSD_COUNTER(-1);
2869 pTsd = 0;
2870 }
2871 return pTsd;
2872#else
2873 static ThreadData *pTsd = 0;
2874 if( allocateFlag>0 ){
2875 if( pTsd==0 ){
2876 if( !sqlite3TestMallocFail() ){
2877 pTsd = sqlite3OsMalloc( sizeof(zeroData) );
2878 }
2879#ifdef SQLITE_MEMDEBUG
2880 sqlite3_isFail = 0;
2881#endif
2882 if( pTsd ){
2883 *pTsd = zeroData;
2884 TSD_COUNTER(+1);
2885 }
2886 }
2887 }else if( pTsd!=0 && allocateFlag<0
2888 && memcmp(pTsd, &zeroData, sizeof(ThreadData))==0 ){
2889 sqlite3OsFree(pTsd);
2890 TSD_COUNTER(-1);
2891 pTsd = 0;
2892 }
2893 return pTsd;
2894#endif
2895}
2896
2897/*
2898** The following variable, if set to a non-zero value, becomes the result
2899** returned from sqlite3OsCurrentTime(). This is used for testing.
2900*/
2901#ifdef SQLITE_TEST
2902int sqlite3_current_time = 0;
2903#endif
2904
2905/*
2906** Find the current time (in Universal Coordinated Time). Write the
2907** current time and date as a Julian Day number into *prNow and
2908** return 0. Return 1 if the time and date cannot be found.
2909*/
2910int sqlite3UnixCurrentTime(double *prNow){
2911#ifdef NO_GETTOD
2912 time_t t;
2913 time(&t);
2914 *prNow = t/86400.0 + 2440587.5;
2915#else
2916 struct timeval sNow;
2917 gettimeofday(&sNow, 0);
2918 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
2919#endif
2920#ifdef SQLITE_TEST
2921 if( sqlite3_current_time ){
2922 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
2923 }
2924#endif
2925 return 0;
2926}
2927
2928#endif /* OS_UNIX */

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status