// Diskv (disk-vee) is a simple, persistent, key-value store. // It stores all data flatly on the filesystem. package diskv import ( "bytes" "errors" "fmt" "io" "io/ioutil" "os" "path/filepath" "strings" "sync" "syscall" ) const ( defaultBasePath = "diskv" defaultFilePerm os.FileMode = 0666 defaultPathPerm os.FileMode = 0777 ) var ( defaultTransform = func(s string) []string { return []string{} } errCanceled = errors.New("canceled") errEmptyKey = errors.New("empty key") errBadKey = errors.New("bad key") errImportDirectory = errors.New("can't import a directory") ) // TransformFunction transforms a key into a slice of strings, with each // element in the slice representing a directory in the file path where the // key's entry will eventually be stored. // // For example, if TransformFunc transforms "abcdef" to ["ab", "cde", "f"], // the final location of the data file will be /ab/cde/f/abcdef type TransformFunction func(s string) []string // Options define a set of properties that dictate Diskv behavior. // All values are optional. type Options struct { BasePath string Transform TransformFunction CacheSizeMax uint64 // bytes PathPerm os.FileMode FilePerm os.FileMode // If TempDir is set, it will enable filesystem atomic writes by // writing temporary files to that location before being moved // to BasePath. // Note that TempDir MUST be on the same device/partition as // BasePath. TempDir string Index Index IndexLess LessFunction Compression Compression } // Diskv implements the Diskv interface. You shouldn't construct Diskv // structures directly; instead, use the New constructor. type Diskv struct { Options mu sync.RWMutex cache map[string][]byte cacheSize uint64 } // New returns an initialized Diskv structure, ready to use. // If the path identified by baseDir already contains data, // it will be accessible, but not yet cached. func New(o Options) *Diskv { if o.BasePath == "" { o.BasePath = defaultBasePath } if o.Transform == nil { o.Transform = defaultTransform } if o.PathPerm == 0 { o.PathPerm = defaultPathPerm } if o.FilePerm == 0 { o.FilePerm = defaultFilePerm } d := &Diskv{ Options: o, cache: map[string][]byte{}, cacheSize: 0, } if d.Index != nil && d.IndexLess != nil { d.Index.Initialize(d.IndexLess, d.Keys(nil)) } return d } // Write synchronously writes the key-value pair to disk, making it immediately // available for reads. Write relies on the filesystem to perform an eventual // sync to physical media. If you need stronger guarantees, see WriteStream. func (d *Diskv) Write(key string, val []byte) error { return d.WriteStream(key, bytes.NewBuffer(val), false) } // WriteStream writes the data represented by the io.Reader to the disk, under // the provided key. If sync is true, WriteStream performs an explicit sync on // the file as soon as it's written. // // bytes.Buffer provides io.Reader semantics for basic data types. func (d *Diskv) WriteStream(key string, r io.Reader, sync bool) error { if len(key) <= 0 { return errEmptyKey } d.mu.Lock() defer d.mu.Unlock() return d.writeStreamWithLock(key, r, sync) } // createKeyFileWithLock either creates the key file directly, or // creates a temporary file in TempDir if it is set. func (d *Diskv) createKeyFileWithLock(key string) (*os.File, error) { if d.TempDir != "" { if err := os.MkdirAll(d.TempDir, d.PathPerm); err != nil { return nil, fmt.Errorf("temp mkdir: %s", err) } f, err := ioutil.TempFile(d.TempDir, "") if err != nil { return nil, fmt.Errorf("temp file: %s", err) } if err := f.Chmod(d.FilePerm); err != nil { f.Close() // error deliberately ignored os.Remove(f.Name()) // error deliberately ignored return nil, fmt.Errorf("chmod: %s", err) } return f, nil } mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC // overwrite if exists f, err := os.OpenFile(d.completeFilename(key), mode, d.FilePerm) if err != nil { return nil, fmt.Errorf("open file: %s", err) } return f, nil } // writeStream does no input validation checking. func (d *Diskv) writeStreamWithLock(key string, r io.Reader, sync bool) error { if err := d.ensurePathWithLock(key); err != nil { return fmt.Errorf("ensure path: %s", err) } f, err := d.createKeyFileWithLock(key) if err != nil { return fmt.Errorf("create key file: %s", err) } wc := io.WriteCloser(&nopWriteCloser{f}) if d.Compression != nil { wc, err = d.Compression.Writer(f) if err != nil { f.Close() // error deliberately ignored os.Remove(f.Name()) // error deliberately ignored return fmt.Errorf("compression writer: %s", err) } } if _, err := io.Copy(wc, r); err != nil { f.Close() // error deliberately ignored os.Remove(f.Name()) // error deliberately ignored return fmt.Errorf("i/o copy: %s", err) } if err := wc.Close(); err != nil { f.Close() // error deliberately ignored os.Remove(f.Name()) // error deliberately ignored return fmt.Errorf("compression close: %s", err) } if sync { if err := f.Sync(); err != nil { f.Close() // error deliberately ignored os.Remove(f.Name()) // error deliberately ignored return fmt.Errorf("file sync: %s", err) } } if err := f.Close(); err != nil { return fmt.Errorf("file close: %s", err) } if f.Name() != d.completeFilename(key) { if err := os.Rename(f.Name(), d.completeFilename(key)); err != nil { os.Remove(f.Name()) // error deliberately ignored return fmt.Errorf("rename: %s", err) } } if d.Index != nil { d.Index.Insert(key) } d.bustCacheWithLock(key) // cache only on read return nil } // Import imports the source file into diskv under the destination key. If the // destination key already exists, it's overwritten. If move is true, the // source file is removed after a successful import. func (d *Diskv) Import(srcFilename, dstKey string, move bool) (err error) { if dstKey == "" { return errEmptyKey } if fi, err := os.Stat(srcFilename); err != nil { return err } else if fi.IsDir() { return errImportDirectory } d.mu.Lock() defer d.mu.Unlock() if err := d.ensurePathWithLock(dstKey); err != nil { return fmt.Errorf("ensure path: %s", err) } if move { if err := syscall.Rename(srcFilename, d.completeFilename(dstKey)); err == nil { d.bustCacheWithLock(dstKey) return nil } else if err != syscall.EXDEV { // If it failed due to being on a different device, fall back to copying return err } } f, err := os.Open(srcFilename) if err != nil { return err } defer f.Close() err = d.writeStreamWithLock(dstKey, f, false) if err == nil && move { err = os.Remove(srcFilename) } return err } // Read reads the key and returns the value. // If the key is available in the cache, Read won't touch the disk. // If the key is not in the cache, Read will have the side-effect of // lazily caching the value. func (d *Diskv) Read(key string) ([]byte, error) { rc, err := d.ReadStream(key, false) if err != nil { return []byte{}, err } defer rc.Close() return ioutil.ReadAll(rc) } // ReadStream reads the key and returns the value (data) as an io.ReadCloser. // If the value is cached from a previous read, and direct is false, // ReadStream will use the cached value. Otherwise, it will return a handle to // the file on disk, and cache the data on read. // // If direct is true, ReadStream will lazily delete any cached value for the // key, and return a direct handle to the file on disk. // // If compression is enabled, ReadStream taps into the io.Reader stream prior // to decompression, and caches the compressed data. func (d *Diskv) ReadStream(key string, direct bool) (io.ReadCloser, error) { d.mu.RLock() defer d.mu.RUnlock() if val, ok := d.cache[key]; ok { if !direct { buf := bytes.NewBuffer(val) if d.Compression != nil { return d.Compression.Reader(buf) } return ioutil.NopCloser(buf), nil } go func() { d.mu.Lock() defer d.mu.Unlock() d.uncacheWithLock(key, uint64(len(val))) }() } return d.readWithRLock(key) } // read ignores the cache, and returns an io.ReadCloser representing the // decompressed data for the given key, streamed from the disk. Clients should // acquire a read lock on the Diskv and check the cache themselves before // calling read. func (d *Diskv) readWithRLock(key string) (io.ReadCloser, error) { filename := d.completeFilename(key) fi, err := os.Stat(filename) if err != nil { return nil, err } if fi.IsDir() { return nil, os.ErrNotExist } f, err := os.Open(filename) if err != nil { return nil, err } var r io.Reader if d.CacheSizeMax > 0 { r = newSiphon(f, d, key) } else { r = &closingReader{f} } var rc = io.ReadCloser(ioutil.NopCloser(r)) if d.Compression != nil { rc, err = d.Compression.Reader(r) if err != nil { return nil, err } } return rc, nil } // closingReader provides a Reader that automatically closes the // embedded ReadCloser when it reaches EOF type closingReader struct { rc io.ReadCloser } func (cr closingReader) Read(p []byte) (int, error) { n, err := cr.rc.Read(p) if err == io.EOF { if closeErr := cr.rc.Close(); closeErr != nil { return n, closeErr // close must succeed for Read to succeed } } return n, err } // siphon is like a TeeReader: it copies all data read through it to an // internal buffer, and moves that buffer to the cache at EOF. type siphon struct { f *os.File d *Diskv key string buf *bytes.Buffer } // newSiphon constructs a siphoning reader that represents the passed file. // When a successful series of reads ends in an EOF, the siphon will write // the buffered data to Diskv's cache under the given key. func newSiphon(f *os.File, d *Diskv, key string) io.Reader { return &siphon{ f: f, d: d, key: key, buf: &bytes.Buffer{}, } } // Read implements the io.Reader interface for siphon. func (s *siphon) Read(p []byte) (int, error) { n, err := s.f.Read(p) if err == nil { return s.buf.Write(p[0:n]) // Write must succeed for Read to succeed } if err == io.EOF { s.d.cacheWithoutLock(s.key, s.buf.Bytes()) // cache may fail if closeErr := s.f.Close(); closeErr != nil { return n, closeErr // close must succeed for Read to succeed } return n, err } return n, err } // Erase synchronously erases the given key from the disk and the cache. func (d *Diskv) Erase(key string) error { d.mu.Lock() defer d.mu.Unlock() d.bustCacheWithLock(key) // erase from index if d.Index != nil { d.Index.Delete(key) } // erase from disk filename := d.completeFilename(key) if s, err := os.Stat(filename); err == nil { if s.IsDir() { return errBadKey } if err = os.Remove(filename); err != nil { return err } } else { // Return err as-is so caller can do os.IsNotExist(err). return err } // clean up and return d.pruneDirsWithLock(key) return nil } // EraseAll will delete all of the data from the store, both in the cache and on // the disk. Note that EraseAll doesn't distinguish diskv-related data from non- // diskv-related data. Care should be taken to always specify a diskv base // directory that is exclusively for diskv data. func (d *Diskv) EraseAll() error { d.mu.Lock() defer d.mu.Unlock() d.cache = make(map[string][]byte) d.cacheSize = 0 if d.TempDir != "" { os.RemoveAll(d.TempDir) // errors ignored } return os.RemoveAll(d.BasePath) } // Has returns true if the given key exists. func (d *Diskv) Has(key string) bool { d.mu.Lock() defer d.mu.Unlock() if _, ok := d.cache[key]; ok { return true } filename := d.completeFilename(key) s, err := os.Stat(filename) if err != nil { return false } if s.IsDir() { return false } return true } // Keys returns a channel that will yield every key accessible by the store, // in undefined order. If a cancel channel is provided, closing it will // terminate and close the keys channel. func (d *Diskv) Keys(cancel <-chan struct{}) <-chan string { return d.KeysPrefix("", cancel) } // KeysPrefix returns a channel that will yield every key accessible by the // store with the given prefix, in undefined order. If a cancel channel is // provided, closing it will terminate and close the keys channel. If the // provided prefix is the empty string, all keys will be yielded. func (d *Diskv) KeysPrefix(prefix string, cancel <-chan struct{}) <-chan string { var prepath string if prefix == "" { prepath = d.BasePath } else { prepath = d.pathFor(prefix) } c := make(chan string) go func() { filepath.Walk(prepath, walker(c, prefix, cancel)) close(c) }() return c } // walker returns a function which satisfies the filepath.WalkFunc interface. // It sends every non-directory file entry down the channel c. func walker(c chan<- string, prefix string, cancel <-chan struct{}) filepath.WalkFunc { return func(path string, info os.FileInfo, err error) error { if err != nil { return err } if info.IsDir() || !strings.HasPrefix(info.Name(), prefix) { return nil // "pass" } select { case c <- info.Name(): case <-cancel: return errCanceled } return nil } } // pathFor returns the absolute path for location on the filesystem where the // data for the given key will be stored. func (d *Diskv) pathFor(key string) string { return filepath.Join(d.BasePath, filepath.Join(d.Transform(key)...)) } // ensurePathWithLock is a helper function that generates all necessary // directories on the filesystem for the given key. func (d *Diskv) ensurePathWithLock(key string) error { return os.MkdirAll(d.pathFor(key), d.PathPerm) } // completeFilename returns the absolute path to the file for the given key. func (d *Diskv) completeFilename(key string) string { return filepath.Join(d.pathFor(key), key) } // cacheWithLock attempts to cache the given key-value pair in the store's // cache. It can fail if the value is larger than the cache's maximum size. func (d *Diskv) cacheWithLock(key string, val []byte) error { valueSize := uint64(len(val)) if err := d.ensureCacheSpaceWithLock(valueSize); err != nil { return fmt.Errorf("%s; not caching", err) } // be very strict about memory guarantees if (d.cacheSize + valueSize) > d.CacheSizeMax { panic(fmt.Sprintf("failed to make room for value (%d/%d)", valueSize, d.CacheSizeMax)) } d.cache[key] = val d.cacheSize += valueSize return nil } // cacheWithoutLock acquires the store's (write) mutex and calls cacheWithLock. func (d *Diskv) cacheWithoutLock(key string, val []byte) error { d.mu.Lock() defer d.mu.Unlock() return d.cacheWithLock(key, val) } func (d *Diskv) bustCacheWithLock(key string) { if val, ok := d.cache[key]; ok { d.uncacheWithLock(key, uint64(len(val))) } } func (d *Diskv) uncacheWithLock(key string, sz uint64) { d.cacheSize -= sz delete(d.cache, key) } // pruneDirsWithLock deletes empty directories in the path walk leading to the // key k. Typically this function is called after an Erase is made. func (d *Diskv) pruneDirsWithLock(key string) error { pathlist := d.Transform(key) for i := range pathlist { dir := filepath.Join(d.BasePath, filepath.Join(pathlist[:len(pathlist)-i]...)) // thanks to Steven Blenkinsop for this snippet switch fi, err := os.Stat(dir); true { case err != nil: return err case !fi.IsDir(): panic(fmt.Sprintf("corrupt dirstate at %s", dir)) } nlinks, err := filepath.Glob(filepath.Join(dir, "*")) if err != nil { return err } else if len(nlinks) > 0 { return nil // has subdirs -- do not prune } if err = os.Remove(dir); err != nil { return err } } return nil } // ensureCacheSpaceWithLock deletes entries from the cache in arbitrary order // until the cache has at least valueSize bytes available. func (d *Diskv) ensureCacheSpaceWithLock(valueSize uint64) error { if valueSize > d.CacheSizeMax { return fmt.Errorf("value size (%d bytes) too large for cache (%d bytes)", valueSize, d.CacheSizeMax) } safe := func() bool { return (d.cacheSize + valueSize) <= d.CacheSizeMax } for key, val := range d.cache { if safe() { break } d.uncacheWithLock(key, uint64(len(val))) } if !safe() { panic(fmt.Sprintf("%d bytes still won't fit in the cache! (max %d bytes)", valueSize, d.CacheSizeMax)) } return nil } // nopWriteCloser wraps an io.Writer and provides a no-op Close method to // satisfy the io.WriteCloser interface. type nopWriteCloser struct { io.Writer } func (wc *nopWriteCloser) Write(p []byte) (int, error) { return wc.Writer.Write(p) } func (wc *nopWriteCloser) Close() error { return nil }