From e34c133ec6053025124416a3861f9f4c4f7fd772 Mon Sep 17 00:00:00 2001 From: Dees_Troy Date: Wed, 6 Feb 2013 19:13:00 +0000 Subject: Add write buffer for tar writes update fuse to 2.9.2 catch return from unlink so that we don't print error messages when things work Change-Id: I1115039a0fa5d9d73f78ef1abd79755d7ffd9d96 --- fuse/Android.mk | 1 + fuse/buffer.c | 318 ++++++++ fuse/cuse_lowlevel.c | 8 +- fuse/fuse.c | 1449 ++++++++++++++++++++++++++++------- fuse/fuse_i.h | 31 +- fuse/fuse_kern_chan.c | 2 +- fuse/fuse_loop.c | 11 +- fuse/fuse_loop_mt.c | 76 +- fuse/fuse_lowlevel.c | 1380 +++++++++++++++++++++++++++++---- fuse/fuse_misc.h | 8 +- fuse/fuse_mt.c | 12 +- fuse/fuse_opt.c | 25 +- fuse/fuse_session.c | 28 + fuse/helper.c | 43 +- fuse/include/fuse.h | 143 +++- fuse/include/fuse_common.h | 217 +++++- fuse/include/fuse_compat.h | 4 +- fuse/include/fuse_kernel.h | 104 ++- fuse/include/fuse_lowlevel.h | 288 ++++++- fuse/include/fuse_lowlevel_compat.h | 4 +- fuse/include/fuse_opt.h | 8 +- fuse/mount.c | 66 +- fuse/mount_util.c | 125 ++- fuse/mount_util.h | 1 + fuse/ulockmgr.c | 4 + 25 files changed, 3741 insertions(+), 615 deletions(-) create mode 100644 fuse/buffer.c (limited to 'fuse') diff --git a/fuse/Android.mk b/fuse/Android.mk index 08559594b..59cd9b3ae 100644 --- a/fuse/Android.mk +++ b/fuse/Android.mk @@ -17,6 +17,7 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ + buffer.c \ cuse_lowlevel.c \ fuse.c \ fuse_kern_chan.c \ diff --git a/fuse/buffer.c b/fuse/buffer.c new file mode 100644 index 000000000..053e396bc --- /dev/null +++ b/fuse/buffer.c @@ -0,0 +1,318 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2010 Miklos Szeredi + + This program can be distributed under the terms of the GNU LGPLv2. + See the file COPYING.LIB +*/ + +#define _GNU_SOURCE + +#include "config.h" +#include "fuse_i.h" +#include "fuse_lowlevel.h" +#include +#include +#include +#include + +size_t fuse_buf_size(const struct fuse_bufvec *bufv) +{ + size_t i; + size_t size = 0; + + for (i = 0; i < bufv->count; i++) { + if (bufv->buf[i].size == SIZE_MAX) + size = SIZE_MAX; + else + size += bufv->buf[i].size; + } + + return size; +} + +static size_t min_size(size_t s1, size_t s2) +{ + return s1 < s2 ? s1 : s2; +} + +static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len) +{ + ssize_t res = 0; + size_t copied = 0; + + while (len) { + if (dst->flags & FUSE_BUF_FD_SEEK) { + res = pwrite(dst->fd, src->mem + src_off, len, + dst->pos + dst_off); + } else { + res = write(dst->fd, src->mem + src_off, len); + } + if (res == -1) { + if (!copied) + return -errno; + break; + } + if (res == 0) + break; + + copied += res; + if (!(dst->flags & FUSE_BUF_FD_RETRY)) + break; + + src_off += res; + dst_off += res; + len -= res; + } + + return copied; +} + +static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len) +{ + ssize_t res = 0; + size_t copied = 0; + + while (len) { + if (src->flags & FUSE_BUF_FD_SEEK) { + res = pread(src->fd, dst->mem + dst_off, len, + src->pos + src_off); + } else { + res = read(src->fd, dst->mem + dst_off, len); + } + if (res == -1) { + if (!copied) + return -errno; + break; + } + if (res == 0) + break; + + copied += res; + if (!(src->flags & FUSE_BUF_FD_RETRY)) + break; + + dst_off += res; + src_off += res; + len -= res; + } + + return copied; +} + +static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len) +{ + char buf[4096]; + struct fuse_buf tmp = { + .size = sizeof(buf), + .flags = 0, + }; + ssize_t res; + size_t copied = 0; + + tmp.mem = buf; + + while (len) { + size_t this_len = min_size(tmp.size, len); + size_t read_len; + + res = fuse_buf_read(&tmp, 0, src, src_off, this_len); + if (res < 0) { + if (!copied) + return res; + break; + } + if (res == 0) + break; + + read_len = res; + res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); + if (res < 0) { + if (!copied) + return res; + break; + } + if (res == 0) + break; + + copied += res; + + if (res < this_len) + break; + + dst_off += res; + src_off += res; + len -= res; + } + + return copied; +} + +#ifdef HAVE_SPLICE +static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +{ + int splice_flags = 0; + off64_t *srcpos = NULL; + off64_t *dstpos = NULL; + off64_t srcpos_val; + off64_t dstpos_val; + ssize_t res; + size_t copied = 0; + + if (flags & FUSE_BUF_SPLICE_MOVE) + splice_flags |= SPLICE_F_MOVE; + if (flags & FUSE_BUF_SPLICE_NONBLOCK) + splice_flags |= SPLICE_F_NONBLOCK; + + if (src->flags & FUSE_BUF_FD_SEEK) { + srcpos_val = src->pos + src_off; + srcpos = &srcpos_val; + } + if (dst->flags & FUSE_BUF_FD_SEEK) { + dstpos_val = dst->pos + dst_off; + dstpos = &dstpos_val; + } + + while (len) { + res = splice(src->fd, srcpos, dst->fd, dstpos, len, + splice_flags); + if (res == -1) { + if (copied) + break; + + if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) + return -errno; + + /* Maybe splice is not supported for this combination */ + return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, + len); + } + if (res == 0) + break; + + copied += res; + if (!(src->flags & FUSE_BUF_FD_RETRY) && + !(dst->flags & FUSE_BUF_FD_RETRY)) { + break; + } + + len -= res; + } + + return copied; +} +#else +static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +{ + (void) flags; + + return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +} +#endif + + +static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +{ + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; + + if (!src_is_fd && !dst_is_fd) { + void *dstmem = dst->mem + dst_off; + void *srcmem = src->mem + src_off; + + if (dstmem != srcmem) { + if (dstmem + len <= srcmem || srcmem + len <= dstmem) + memcpy(dstmem, srcmem, len); + else + memmove(dstmem, srcmem, len); + } + + return len; + } else if (!src_is_fd) { + return fuse_buf_write(dst, dst_off, src, src_off, len); + } else if (!dst_is_fd) { + return fuse_buf_read(dst, dst_off, src, src_off, len); + } else if (flags & FUSE_BUF_NO_SPLICE) { + return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } else { + return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); + } +} + +static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) +{ + if (bufv->idx < bufv->count) + return &bufv->buf[bufv->idx]; + else + return NULL; +} + +static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) +{ + const struct fuse_buf *buf = fuse_bufvec_current(bufv); + + bufv->off += len; + assert(bufv->off <= buf->size); + if (bufv->off == buf->size) { + assert(bufv->idx < bufv->count); + bufv->idx++; + if (bufv->idx == bufv->count) + return 0; + bufv->off = 0; + } + return 1; +} + +ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, + enum fuse_buf_copy_flags flags) +{ + size_t copied = 0; + + if (dstv == srcv) + return fuse_buf_size(dstv); + + for (;;) { + const struct fuse_buf *src = fuse_bufvec_current(srcv); + const struct fuse_buf *dst = fuse_bufvec_current(dstv); + size_t src_len; + size_t dst_len; + size_t len; + ssize_t res; + + if (src == NULL || dst == NULL) + break; + + src_len = src->size - srcv->off; + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + + res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); + if (res < 0) { + if (!copied) + return res; + break; + } + copied += res; + + if (!fuse_bufvec_advance(srcv, res) || + !fuse_bufvec_advance(dstv, res)) + break; + + if (res < len) + break; + } + + return copied; +} diff --git a/fuse/cuse_lowlevel.c b/fuse/cuse_lowlevel.c index 970df7fb7..be49ad473 100644 --- a/fuse/cuse_lowlevel.c +++ b/fuse/cuse_lowlevel.c @@ -214,14 +214,14 @@ void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) f->conn.want = 0; if (arg->major < 7) { - fprintf(stderr, "fuse: unsupported protocol version: %u.%u\n", + fprintf(stderr, "cuse: unsupported protocol version: %u.%u\n", arg->major, arg->minor); fuse_reply_err(req, EPROTO); return; } if (bufsize < FUSE_MIN_READ_BUFFER) { - fprintf(stderr, "fuse: warning: buffer size too small: %zu\n", + fprintf(stderr, "cuse: warning: buffer size too small: %zu\n", bufsize); bufsize = FUSE_MIN_READ_BUFFER; } @@ -306,9 +306,9 @@ struct fuse_session *cuse_lowlevel_setup(int argc, char *argv[], fd = open(devname, O_RDWR); if (fd == -1) { if (errno == ENODEV || errno == ENOENT) - fprintf(stderr, "fuse: device not found, try 'modprobe cuse' first\n"); + fprintf(stderr, "cuse: device not found, try 'modprobe cuse' first\n"); else - fprintf(stderr, "fuse: failed to open %s: %s\n", + fprintf(stderr, "cuse: failed to open %s: %s\n", devname, strerror(errno)); goto err_se; } diff --git a/fuse/fuse.c b/fuse/fuse.c index 98170cf9a..34b11d47a 100644 --- a/fuse/fuse.c +++ b/fuse/fuse.c @@ -10,6 +10,7 @@ /* For pthread_rwlock_t */ #define _GNU_SOURCE +#include "config.h" #include "fuse_i.h" #include "fuse_lowlevel.h" #include "fuse_opt.h" @@ -22,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -30,15 +32,25 @@ #include #include #include +#include #include #include #include +#include + +#define FUSE_NODE_SLAB 1 + +#ifndef MAP_ANONYMOUS +#undef FUSE_NODE_SLAB +#endif #define FUSE_DEFAULT_INTR_SIGNAL SIGUSR1 #define FUSE_UNKNOWN_INO 0xffffffff #define OFFSET_MAX 0x7fffffffffffffffLL +#define NODE_TABLE_MIN_SIZE 8192 + struct fuse_config { unsigned int uid; unsigned int gid; @@ -48,7 +60,8 @@ struct fuse_config { double attr_timeout; double ac_attr_timeout; int ac_attr_timeout_set; - int noforget; + int remember; + int nopath; int debug; int hard_remove; int use_ino; @@ -79,16 +92,52 @@ struct fusemod_so { }; struct lock_queue_element { - struct lock_queue_element *next; - pthread_cond_t cond; + struct lock_queue_element *next; + pthread_cond_t cond; + fuse_ino_t nodeid1; + const char *name1; + char **path1; + struct node **wnode1; + fuse_ino_t nodeid2; + const char *name2; + char **path2; + struct node **wnode2; + int err; + bool first_locked : 1; + bool second_locked : 1; + bool done : 1; +}; + +struct node_table { + struct node **array; + size_t use; + size_t size; + size_t split; +}; + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +struct list_head { + struct list_head *next; + struct list_head *prev; +}; + +struct node_slab { + struct list_head list; /* must be the first member */ + struct list_head freelist; + int used; }; struct fuse { struct fuse_session *se; - struct node **name_table; - size_t name_table_size; - struct node **id_table; - size_t id_table_size; + struct node_table name_table; + struct node_table id_table; + struct list_head lru_table; fuse_ino_t ctr; unsigned int generation; unsigned int hidectr; @@ -97,8 +146,12 @@ struct fuse { int intr_installed; struct fuse_fs *fs; int nullpath_ok; - int curr_ticket; + int utime_omit_ok; struct lock_queue_element *lockq; + int pagesize; + struct list_head partial_slabs; + struct list_head full_slabs; + pthread_t prune_thread; }; struct lock { @@ -127,7 +180,16 @@ struct node { unsigned int is_hidden : 1; unsigned int cache_valid : 1; int treelock; - int ticket; + char inline_name[32]; +}; + +#define TREELOCK_WRITE -1 +#define TREELOCK_WAIT_OFFSET INT_MIN + +struct node_lru { + struct node node; + struct list_head lru; + struct timespec forget_time; }; struct fuse_dh { @@ -258,12 +320,185 @@ static void fuse_put_module(struct fuse_module *m) pthread_mutex_unlock(&fuse_context_lock); } +static void init_list_head(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +static int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +static void list_add(struct list_head *new, struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add_head(struct list_head *new, struct list_head *head) +{ + list_add(new, head, head->next); +} + +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + list_add(new, head->prev, head); +} + +static inline void list_del(struct list_head *entry) +{ + struct list_head *prev = entry->prev; + struct list_head *next = entry->next; + + next->prev = prev; + prev->next = next; +} + +static inline int lru_enabled(struct fuse *f) +{ + return f->conf.remember > 0; +} + +static struct node_lru *node_lru(struct node *node) +{ + return (struct node_lru *) node; +} + +static size_t get_node_size(struct fuse *f) +{ + if (lru_enabled(f)) + return sizeof(struct node_lru); + else + return sizeof(struct node); +} + +#ifdef FUSE_NODE_SLAB +static struct node_slab *list_to_slab(struct list_head *head) +{ + return (struct node_slab *) head; +} + +static struct node_slab *node_to_slab(struct fuse *f, struct node *node) +{ + return (struct node_slab *) (((uintptr_t) node) & ~((uintptr_t) f->pagesize - 1)); +} + +static int alloc_slab(struct fuse *f) +{ + void *mem; + struct node_slab *slab; + char *start; + size_t num; + size_t i; + size_t node_size = get_node_size(f); + + mem = mmap(NULL, f->pagesize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED) + return -1; + + slab = mem; + init_list_head(&slab->freelist); + slab->used = 0; + num = (f->pagesize - sizeof(struct node_slab)) / node_size; + + start = (char *) mem + f->pagesize - num * node_size; + for (i = 0; i < num; i++) { + struct list_head *n; + + n = (struct list_head *) (start + i * node_size); + list_add_tail(n, &slab->freelist); + } + list_add_tail(&slab->list, &f->partial_slabs); + + return 0; +} + +static struct node *alloc_node(struct fuse *f) +{ + struct node_slab *slab; + struct list_head *node; + + if (list_empty(&f->partial_slabs)) { + int res = alloc_slab(f); + if (res != 0) + return NULL; + } + slab = list_to_slab(f->partial_slabs.next); + slab->used++; + node = slab->freelist.next; + list_del(node); + if (list_empty(&slab->freelist)) { + list_del(&slab->list); + list_add_tail(&slab->list, &f->full_slabs); + } + memset(node, 0, sizeof(struct node)); + + return (struct node *) node; +} + +static void free_slab(struct fuse *f, struct node_slab *slab) +{ + int res; + + list_del(&slab->list); + res = munmap(slab, f->pagesize); + if (res == -1) + fprintf(stderr, "fuse warning: munmap(%p) failed\n", slab); +} + +static void free_node_mem(struct fuse *f, struct node *node) +{ + struct node_slab *slab = node_to_slab(f, node); + struct list_head *n = (struct list_head *) node; + + slab->used--; + if (slab->used) { + if (list_empty(&slab->freelist)) { + list_del(&slab->list); + list_add_tail(&slab->list, &f->partial_slabs); + } + list_add_head(n, &slab->freelist); + } else { + free_slab(f, slab); + } +} +#else +static struct node *alloc_node(struct fuse *f) +{ + return (struct node *) calloc(1, get_node_size(f)); +} + +static void free_node_mem(struct fuse *f, struct node *node) +{ + (void) f; + free(node); +} +#endif + +static size_t id_hash(struct fuse *f, fuse_ino_t ino) +{ + uint64_t hash = ((uint32_t) ino * 2654435761U) % f->id_table.size; + uint64_t oldhash = hash % (f->id_table.size / 2); + + if (oldhash >= f->id_table.split) + return oldhash; + else + return hash; +} + static struct node *get_node_nocheck(struct fuse *f, fuse_ino_t nodeid) { - size_t hash = nodeid % f->id_table_size; + size_t hash = id_hash(f, nodeid); struct node *node; - for (node = f->id_table[hash]; node != NULL; node = node->id_next) + for (node = f->id_table.array[hash]; node != NULL; node = node->id_next) if (node->nodeid == nodeid) return node; @@ -281,59 +516,211 @@ static struct node *get_node(struct fuse *f, fuse_ino_t nodeid) return node; } -static void free_node(struct node *node) +static void curr_time(struct timespec *now); +static double diff_timespec(const struct timespec *t1, + const struct timespec *t2); + +static void remove_node_lru(struct node *node) { - free(node->name); - free(node); + struct node_lru *lnode = node_lru(node); + list_del(&lnode->lru); + init_list_head(&lnode->lru); +} + +static void set_forget_time(struct fuse *f, struct node *node) +{ + struct node_lru *lnode = node_lru(node); + + list_del(&lnode->lru); + list_add_tail(&lnode->lru, &f->lru_table); + curr_time(&lnode->forget_time); +} + +static void free_node(struct fuse *f, struct node *node) +{ + if (node->name != node->inline_name) + free(node->name); + free_node_mem(f, node); +} + +static void node_table_reduce(struct node_table *t) +{ + size_t newsize = t->size / 2; + void *newarray; + + if (newsize < NODE_TABLE_MIN_SIZE) + return; + + newarray = realloc(t->array, sizeof(struct node *) * newsize); + if (newarray != NULL) + t->array = newarray; + + t->size = newsize; + t->split = t->size / 2; +} + +static void remerge_id(struct fuse *f) +{ + struct node_table *t = &f->id_table; + int iter; + + if (t->split == 0) + node_table_reduce(t); + + for (iter = 8; t->split > 0 && iter; iter--) { + struct node **upper; + + t->split--; + upper = &t->array[t->split + t->size / 2]; + if (*upper) { + struct node **nodep; + + for (nodep = &t->array[t->split]; *nodep; + nodep = &(*nodep)->id_next); + + *nodep = *upper; + *upper = NULL; + break; + } + } } static void unhash_id(struct fuse *f, struct node *node) { - size_t hash = node->nodeid % f->id_table_size; - struct node **nodep = &f->id_table[hash]; + struct node **nodep = &f->id_table.array[id_hash(f, node->nodeid)]; for (; *nodep != NULL; nodep = &(*nodep)->id_next) if (*nodep == node) { *nodep = node->id_next; + f->id_table.use--; + + if(f->id_table.use < f->id_table.size / 4) + remerge_id(f); return; } } +static int node_table_resize(struct node_table *t) +{ + size_t newsize = t->size * 2; + void *newarray; + + newarray = realloc(t->array, sizeof(struct node *) * newsize); + if (newarray == NULL) + return -1; + + t->array = newarray; + memset(t->array + t->size, 0, t->size * sizeof(struct node *)); + t->size = newsize; + t->split = 0; + + return 0; +} + +static void rehash_id(struct fuse *f) +{ + struct node_table *t = &f->id_table; + struct node **nodep; + struct node **next; + size_t hash; + + if (t->split == t->size / 2) + return; + + hash = t->split; + t->split++; + for (nodep = &t->array[hash]; *nodep != NULL; nodep = next) { + struct node *node = *nodep; + size_t newhash = id_hash(f, node->nodeid); + + if (newhash != hash) { + next = nodep; + *nodep = node->id_next; + node->id_next = t->array[newhash]; + t->array[newhash] = node; + } else { + next = &node->id_next; + } + } + if (t->split == t->size / 2) + node_table_resize(t); +} + static void hash_id(struct fuse *f, struct node *node) { - size_t hash = node->nodeid % f->id_table_size; - node->id_next = f->id_table[hash]; - f->id_table[hash] = node; + size_t hash = id_hash(f, node->nodeid); + node->id_next = f->id_table.array[hash]; + f->id_table.array[hash] = node; + f->id_table.use++; + + if (f->id_table.use >= f->id_table.size / 2) + rehash_id(f); } -static unsigned int name_hash(struct fuse *f, fuse_ino_t parent, - const char *name) +static size_t name_hash(struct fuse *f, fuse_ino_t parent, + const char *name) { - unsigned int hash = *name; + uint64_t hash = parent; + uint64_t oldhash; - if (hash) - for (name += 1; *name != '\0'; name++) - hash = (hash << 5) - hash + *name; + for (; *name; name++) + hash = hash * 31 + (unsigned char) *name; - return (hash + parent) % f->name_table_size; + hash %= f->name_table.size; + oldhash = hash % (f->name_table.size / 2); + if (oldhash >= f->name_table.split) + return oldhash; + else + return hash; } static void unref_node(struct fuse *f, struct node *node); +static void remerge_name(struct fuse *f) +{ + struct node_table *t = &f->name_table; + int iter; + + if (t->split == 0) + node_table_reduce(t); + + for (iter = 8; t->split > 0 && iter; iter--) { + struct node **upper; + + t->split--; + upper = &t->array[t->split + t->size / 2]; + if (*upper) { + struct node **nodep; + + for (nodep = &t->array[t->split]; *nodep; + nodep = &(*nodep)->name_next); + + *nodep = *upper; + *upper = NULL; + break; + } + } +} + static void unhash_name(struct fuse *f, struct node *node) { if (node->name) { size_t hash = name_hash(f, node->parent->nodeid, node->name); - struct node **nodep = &f->name_table[hash]; + struct node **nodep = &f->name_table.array[hash]; for (; *nodep != NULL; nodep = &(*nodep)->name_next) if (*nodep == node) { *nodep = node->name_next; node->name_next = NULL; unref_node(f, node->parent); - free(node->name); + if (node->name != node->inline_name) + free(node->name); node->name = NULL; node->parent = NULL; + f->name_table.use--; + + if (f->name_table.use < f->name_table.size / 4) + remerge_name(f); return; } fprintf(stderr, @@ -343,19 +730,58 @@ static void unhash_name(struct fuse *f, struct node *node) } } +static void rehash_name(struct fuse *f) +{ + struct node_table *t = &f->name_table; + struct node **nodep; + struct node **next; + size_t hash; + + if (t->split == t->size / 2) + return; + + hash = t->split; + t->split++; + for (nodep = &t->array[hash]; *nodep != NULL; nodep = next) { + struct node *node = *nodep; + size_t newhash = name_hash(f, node->parent->nodeid, node->name); + + if (newhash != hash) { + next = nodep; + *nodep = node->name_next; + node->name_next = t->array[newhash]; + t->array[newhash] = node; + } else { + next = &node->name_next; + } + } + if (t->split == t->size / 2) + node_table_resize(t); +} + static int hash_name(struct fuse *f, struct node *node, fuse_ino_t parentid, const char *name) { size_t hash = name_hash(f, parentid, name); struct node *parent = get_node(f, parentid); - node->name = strdup(name); - if (node->name == NULL) - return -1; + if (strlen(name) < sizeof(node->inline_name)) { + strcpy(node->inline_name, name); + node->name = node->inline_name; + } else { + node->name = strdup(name); + if (node->name == NULL) + return -1; + } parent->refctr ++; node->parent = parent; - node->name_next = f->name_table[hash]; - f->name_table[hash] = node; + node->name_next = f->name_table.array[hash]; + f->name_table.array[hash] = node; + f->name_table.use++; + + if (f->name_table.use >= f->name_table.size / 2) + rehash_name(f); + return 0; } @@ -366,9 +792,11 @@ static void delete_node(struct fuse *f, struct node *node) (unsigned long long) node->nodeid); assert(node->treelock == 0); - assert(!node->name); + unhash_name(f, node); + if (lru_enabled(f)) + remove_node_lru(node); unhash_id(f, node); - free_node(node); + free_node(f, node); } static void unref_node(struct fuse *f, struct node *node) @@ -396,7 +824,7 @@ static struct node *lookup_node(struct fuse *f, fuse_ino_t parent, size_t hash = name_hash(f, parent, name); struct node *node; - for (node = f->name_table[hash]; node != NULL; node = node->name_next) + for (node = f->name_table.array[hash]; node != NULL; node = node->name_next) if (node->parent->nodeid == parent && strcmp(node->name, name) == 0) return node; @@ -404,6 +832,13 @@ static struct node *lookup_node(struct fuse *f, fuse_ino_t parent, return NULL; } +static void inc_nlookup(struct node *node) +{ + if (!node->nlookup) + node->refctr++; + node->nlookup++; +} + static struct node *find_node(struct fuse *f, fuse_ino_t parent, const char *name) { @@ -415,27 +850,29 @@ static struct node *find_node(struct fuse *f, fuse_ino_t parent, else node = lookup_node(f, parent, name); if (node == NULL) { - node = (struct node *) calloc(1, sizeof(struct node)); + node = alloc_node(f); if (node == NULL) goto out_err; - if (f->conf.noforget) - node->nlookup = 1; - node->refctr = 1; node->nodeid = next_id(f); node->generation = f->generation; - node->open_count = 0; - node->is_hidden = 0; - node->treelock = 0; - node->ticket = 0; + if (f->conf.remember) + inc_nlookup(node); + if (hash_name(f, node, parent, name) == -1) { - free(node); + free_node(f, node); node = NULL; goto out_err; } hash_id(f, node); + if (lru_enabled(f)) { + struct node_lru *lnode = node_lru(node); + init_list_head(&lnode->lru); + } + } else if (lru_enabled(f) && node->nlookup == 1) { + remove_node_lru(node); } - node->nlookup ++; + inc_nlookup(node); out_err: pthread_mutex_unlock(&f->lock); return node; @@ -475,28 +912,28 @@ static char *add_name(char **buf, unsigned *bufsize, char *s, const char *name) } static void unlock_path(struct fuse *f, fuse_ino_t nodeid, struct node *wnode, - struct node *end, int ticket) + struct node *end) { struct node *node; if (wnode) { - assert(wnode->treelock == -1); + assert(wnode->treelock == TREELOCK_WRITE); wnode->treelock = 0; - if (!wnode->ticket) - wnode->ticket = ticket; } for (node = get_node(f, nodeid); node != end && node->nodeid != FUSE_ROOT_ID; node = node->parent) { - assert(node->treelock > 0); + assert(node->treelock != 0); + assert(node->treelock != TREELOCK_WAIT_OFFSET); + assert(node->treelock != TREELOCK_WRITE); node->treelock--; - if (!node->ticket) - node->ticket = ticket; + if (node->treelock == TREELOCK_WAIT_OFFSET) + node->treelock = 0; } } static int try_get_path(struct fuse *f, fuse_ino_t nodeid, const char *name, - char **path, struct node **wnodep, int ticket) + char **path, struct node **wnodep, bool need_lock) { unsigned bufsize = 256; char *buf; @@ -507,9 +944,10 @@ static int try_get_path(struct fuse *f, fuse_ino_t nodeid, const char *name, *path = NULL; + err = -ENOMEM; buf = malloc(bufsize); if (buf == NULL) - return -ENOMEM; + goto out_err; s = buf + bufsize - 1; *s = '\0'; @@ -517,51 +955,41 @@ static int try_get_path(struct fuse *f, fuse_ino_t nodeid, const char *name, if (name != NULL) { s = add_name(&buf, &bufsize, s, name); err = -ENOMEM; - printf("setting err to ENOMEM\n"); if (s == NULL) goto out_free; } if (wnodep) { - assert(ticket); + assert(need_lock); wnode = lookup_node(f, nodeid, name); if (wnode) { - if (wnode->treelock != 0 || - (wnode->ticket && wnode->ticket != ticket)) { - if (!wnode->ticket) - wnode->ticket = ticket; + if (wnode->treelock != 0) { + if (wnode->treelock > 0) + wnode->treelock += TREELOCK_WAIT_OFFSET; err = -EAGAIN; - printf("setting err to EAGAIN\n"); goto out_free; } - wnode->treelock = -1; - wnode->ticket = 0; + wnode->treelock = TREELOCK_WRITE; } } - err = 0; for (node = get_node(f, nodeid); node->nodeid != FUSE_ROOT_ID; node = node->parent) { err = -ENOENT; - printf("setting err to ENOENT\n"); if (node->name == NULL || node->parent == NULL) goto out_unlock; err = -ENOMEM; - printf("setting err to ENOMEM\n"); s = add_name(&buf, &bufsize, s, node->name); if (s == NULL) goto out_unlock; - if (ticket) { + if (need_lock) { err = -EAGAIN; - printf("setting err to EAGAIN\n"); - if (node->treelock == -1 || - (node->ticket && node->ticket != ticket)) + if (node->treelock < 0) goto out_unlock; node->treelock++; - node->ticket = 0; } } @@ -577,36 +1005,95 @@ static int try_get_path(struct fuse *f, fuse_ino_t nodeid, const char *name, return 0; out_unlock: - if (ticket) - unlock_path(f, nodeid, wnode, node, ticket); + if (need_lock) + unlock_path(f, nodeid, wnode, node); out_free: free(buf); + out_err: return err; } -static void wake_up_first(struct fuse *f) +static void queue_element_unlock(struct fuse *f, struct lock_queue_element *qe) { - if (f->lockq) - pthread_cond_signal(&f->lockq->cond); + struct node *wnode; + + if (qe->first_locked) { + wnode = qe->wnode1 ? *qe->wnode1 : NULL; + unlock_path(f, qe->nodeid1, wnode, NULL); + } + if (qe->second_locked) { + wnode = qe->wnode2 ? *qe->wnode2 : NULL; + unlock_path(f, qe->nodeid2, wnode, NULL); + } } -static void wake_up_next(struct lock_queue_element *qe) +static void queue_element_wakeup(struct fuse *f, struct lock_queue_element *qe) { - if (qe->next) - pthread_cond_signal(&qe->next->cond); + int err; + bool first = (qe == f->lockq); + + if (!qe->path1) { + /* Just waiting for it to be unlocked */ + if (get_node(f, qe->nodeid1)->treelock == 0) + pthread_cond_signal(&qe->cond); + + return; + } + + if (!qe->first_locked) { + err = try_get_path(f, qe->nodeid1, qe->name1, qe->path1, + qe->wnode1, true); + if (!err) + qe->first_locked = true; + else if (err != -EAGAIN) + goto err_unlock; + } + if (!qe->second_locked && qe->path2) { + err = try_get_path(f, qe->nodeid2, qe->name2, qe->path2, + qe->wnode2, true); + if (!err) + qe->second_locked = true; + else if (err != -EAGAIN) + goto err_unlock; + } + + if (qe->first_locked && (qe->second_locked || !qe->path2)) { + err = 0; + goto done; + } + + /* + * Only let the first element be partially locked otherwise there could + * be a deadlock. + * + * But do allow the first element to be partially locked to prevent + * starvation. + */ + if (!first) + queue_element_unlock(f, qe); + + /* keep trying */ + return; + +err_unlock: + queue_element_unlock(f, qe); +done: + qe->err = err; + qe->done = true; + pthread_cond_signal(&qe->cond); } -static int get_ticket(struct fuse *f) +static void wake_up_queued(struct fuse *f) { - do f->curr_ticket++; - while (f->curr_ticket == 0); + struct lock_queue_element *qe; - return f->curr_ticket; + for (qe = f->lockq; qe != NULL; qe = qe->next) + queue_element_wakeup(f, qe); } static void debug_path(struct fuse *f, const char *msg, fuse_ino_t nodeid, - const char *name, int wr) + const char *name, bool wr) { if (f->conf.debug) { struct node *wnode = NULL; @@ -621,56 +1108,58 @@ static void debug_path(struct fuse *f, const char *msg, fuse_ino_t nodeid, } } -static void queue_path(struct fuse *f, struct lock_queue_element *qe, - fuse_ino_t nodeid, const char *name, int wr) +static void queue_path(struct fuse *f, struct lock_queue_element *qe) { struct lock_queue_element **qp; - debug_path(f, "QUEUE PATH", nodeid, name, wr); + qe->done = false; + qe->first_locked = false; + qe->second_locked = false; pthread_cond_init(&qe->cond, NULL); qe->next = NULL; for (qp = &f->lockq; *qp != NULL; qp = &(*qp)->next); *qp = qe; } -static void dequeue_path(struct fuse *f, struct lock_queue_element *qe, - fuse_ino_t nodeid, const char *name, int wr) +static void dequeue_path(struct fuse *f, struct lock_queue_element *qe) { struct lock_queue_element **qp; - debug_path(f, "DEQUEUE PATH", nodeid, name, wr); pthread_cond_destroy(&qe->cond); for (qp = &f->lockq; *qp != qe; qp = &(*qp)->next); *qp = qe->next; } -static void wait_on_path(struct fuse *f, struct lock_queue_element *qe, - fuse_ino_t nodeid, const char *name, int wr) +static int wait_path(struct fuse *f, struct lock_queue_element *qe) { - debug_path(f, "WAIT ON PATH", nodeid, name, wr); - pthread_cond_wait(&qe->cond, &f->lock); + queue_path(f, qe); + + do { + pthread_cond_wait(&qe->cond, &f->lock); + } while (!qe->done); + + dequeue_path(f, qe); + + return qe->err; } static int get_path_common(struct fuse *f, fuse_ino_t nodeid, const char *name, char **path, struct node **wnode) { int err; - int ticket; pthread_mutex_lock(&f->lock); - ticket = get_ticket(f); - err = try_get_path(f, nodeid, name, path, wnode, ticket); + err = try_get_path(f, nodeid, name, path, wnode, true); if (err == -EAGAIN) { - struct lock_queue_element qe; - - queue_path(f, &qe, nodeid, name, !!wnode); - do { - wait_on_path(f, &qe, nodeid, name, !!wnode); - err = try_get_path(f, nodeid, name, path, wnode, - ticket); - wake_up_next(&qe); - } while (err == -EAGAIN); - dequeue_path(f, &qe, nodeid, name, !!wnode); + struct lock_queue_element qe = { + .nodeid1 = nodeid, + .name1 = name, + .path1 = path, + .wnode1 = wnode, + }; + debug_path(f, "QUEUE PATH", nodeid, name, !!wnode); + err = wait_path(f, &qe); + debug_path(f, "DEQUEUE PATH", nodeid, name, !!wnode); } pthread_mutex_unlock(&f->lock); @@ -684,10 +1173,15 @@ static int get_path(struct fuse *f, fuse_ino_t nodeid, char **path) static int get_path_nullok(struct fuse *f, fuse_ino_t nodeid, char **path) { - int err = get_path_common(f, nodeid, NULL, path, NULL); + int err = 0; - if (err == -ENOENT && f->nullpath_ok) - err = 0; + if (f->conf.nopath) { + *path = NULL; + } else { + err = get_path_common(f, nodeid, NULL, path, NULL); + if (err == -ENOENT && f->nullpath_ok) + err = 0; + } return err; } @@ -707,18 +1201,20 @@ static int get_path_wrlock(struct fuse *f, fuse_ino_t nodeid, const char *name, static int try_get_path2(struct fuse *f, fuse_ino_t nodeid1, const char *name1, fuse_ino_t nodeid2, const char *name2, char **path1, char **path2, - struct node **wnode1, struct node **wnode2, - int ticket) + struct node **wnode1, struct node **wnode2) { int err; /* FIXME: locking two paths needs deadlock checking */ - err = try_get_path(f, nodeid1, name1, path1, wnode1, ticket); + err = try_get_path(f, nodeid1, name1, path1, wnode1, true); if (!err) { - err = try_get_path(f, nodeid2, name2, path2, wnode2, ticket); - if (err) - unlock_path(f, nodeid1, wnode1 ? *wnode1 : NULL, NULL, - ticket); + err = try_get_path(f, nodeid2, name2, path2, wnode2, true); + if (err) { + struct node *wn1 = wnode1 ? *wnode1 : NULL; + + unlock_path(f, nodeid1, wn1, NULL); + free(*path1); + } } return err; } @@ -729,27 +1225,27 @@ static int get_path2(struct fuse *f, fuse_ino_t nodeid1, const char *name1, struct node **wnode1, struct node **wnode2) { int err; - int ticket; pthread_mutex_lock(&f->lock); - ticket = get_ticket(f); err = try_get_path2(f, nodeid1, name1, nodeid2, name2, - path1, path2, wnode1, wnode2, ticket); + path1, path2, wnode1, wnode2); if (err == -EAGAIN) { - struct lock_queue_element qe; - - queue_path(f, &qe, nodeid1, name1, !!wnode1); - debug_path(f, " path2", nodeid2, name2, !!wnode2); - do { - wait_on_path(f, &qe, nodeid1, name1, !!wnode1); - debug_path(f, " path2", nodeid2, name2, !!wnode2); - err = try_get_path2(f, nodeid1, name1, nodeid2, name2, - path1, path2, wnode1, wnode2, - ticket); - wake_up_next(&qe); - } while (err == -EAGAIN); - dequeue_path(f, &qe, nodeid1, name1, !!wnode1); - debug_path(f, " path2", nodeid2, name2, !!wnode2); + struct lock_queue_element qe = { + .nodeid1 = nodeid1, + .name1 = name1, + .path1 = path1, + .wnode1 = wnode1, + .nodeid2 = nodeid2, + .name2 = name2, + .path2 = path2, + .wnode2 = wnode2, + }; + + debug_path(f, "QUEUE PATH1", nodeid1, name1, !!wnode1); + debug_path(f, " PATH2", nodeid2, name2, !!wnode2); + err = wait_path(f, &qe); + debug_path(f, "DEQUEUE PATH1", nodeid1, name1, !!wnode1); + debug_path(f, " PATH2", nodeid2, name2, !!wnode2); } pthread_mutex_unlock(&f->lock); @@ -760,8 +1256,9 @@ static void free_path_wrlock(struct fuse *f, fuse_ino_t nodeid, struct node *wnode, char *path) { pthread_mutex_lock(&f->lock); - unlock_path(f, nodeid, wnode, NULL, 0); - wake_up_first(f); + unlock_path(f, nodeid, wnode, NULL); + if (f->lockq) + wake_up_queued(f); pthread_mutex_unlock(&f->lock); free(path); } @@ -777,9 +1274,9 @@ static void free_path2(struct fuse *f, fuse_ino_t nodeid1, fuse_ino_t nodeid2, char *path1, char *path2) { pthread_mutex_lock(&f->lock); - unlock_path(f, nodeid1, wnode1, NULL, 0); - unlock_path(f, nodeid2, wnode2, NULL, 0); - wake_up_first(f); + unlock_path(f, nodeid1, wnode1, NULL); + unlock_path(f, nodeid2, wnode2, NULL); + wake_up_queued(f); pthread_mutex_unlock(&f->lock); free(path1); free(path2); @@ -798,29 +1295,34 @@ static void forget_node(struct fuse *f, fuse_ino_t nodeid, uint64_t nlookup) * create and opendir */ while (node->nlookup == nlookup && node->treelock) { - struct lock_queue_element qe; + struct lock_queue_element qe = { + .nodeid1 = nodeid, + }; - queue_path(f, &qe, node->nodeid, NULL, 0); - do { - wait_on_path(f, &qe, node->nodeid, NULL, 0); - wake_up_next(&qe); + debug_path(f, "QUEUE PATH (forget)", nodeid, NULL, false); + queue_path(f, &qe); + do { + pthread_cond_wait(&qe.cond, &f->lock); } while (node->nlookup == nlookup && node->treelock); - dequeue_path(f, &qe, node->nodeid, NULL, 0); + + dequeue_path(f, &qe); + debug_path(f, "DEQUEUE_PATH (forget)", nodeid, NULL, false); } assert(node->nlookup >= nlookup); node->nlookup -= nlookup; if (!node->nlookup) { - unhash_name(f, node); unref_node(f, node); + } else if (lru_enabled(f) && node->nlookup == 1) { + set_forget_time(f, node); } pthread_mutex_unlock(&f->lock); } static void unlink_node(struct fuse *f, struct node *node) { - if (f->conf.noforget) { + if (f->conf.remember) { assert(node->nlookup > 1); node->nlookup--; } @@ -959,7 +1461,7 @@ static inline void fuse_prepare_interrupt(struct fuse *f, fuse_req_t req, fuse_do_prepare_interrupt(req, d); } -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) static int fuse_compat_open(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi) @@ -1054,7 +1556,7 @@ static int fuse_compat_statfs(struct fuse_fs *fs, const char *path, return err; } -#else /* __FreeBSD__ */ +#else /* __FreeBSD__ || __NetBSD__ */ static inline int fuse_compat_open(struct fuse_fs *fs, char *path, struct fuse_file_info *fi) @@ -1080,7 +1582,7 @@ static inline int fuse_compat_statfs(struct fuse_fs *fs, const char *path, return fs->op.statfs(fs->compat == 25 ? "/" : path, buf); } -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf) { @@ -1243,52 +1745,140 @@ int fuse_fs_open(struct fuse_fs *fs, const char *path, } } -int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, - off64_t off, struct fuse_file_info *fi) +static void fuse_free_buf(struct fuse_bufvec *buf) +{ + if (buf != NULL) { + size_t i; + + for (i = 0; i < buf->count; i++) + free(buf->buf[i].mem); + free(buf); + } +} + +int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec **bufp, size_t size, off64_t off, + struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.read) { + if (fs->op.read || fs->op.read_buf) { int res; if (fs->debug) fprintf(stderr, - "read[%llu] %lu bytes from %llu flags: 0x%x\n", + "read[%llu] %zu bytes from %llu flags: 0x%x\n", (unsigned long long) fi->fh, - (unsigned long) size, (unsigned long long) off, - fi->flags); + size, (unsigned long long) off, fi->flags); + + if (fs->op.read_buf) { + res = fs->op.read_buf(path, bufp, size, off, fi); + } else { + struct fuse_bufvec *buf; + void *mem; + + buf = malloc(sizeof(struct fuse_bufvec)); + if (buf == NULL) + return -ENOMEM; + + mem = malloc(size); + if (mem == NULL) { + free(buf); + return -ENOMEM; + } + *buf = FUSE_BUFVEC_INIT(size); + buf->buf[0].mem = mem; + *bufp = buf; - res = fs->op.read(path, buf, size, off, fi); + res = fs->op.read(path, mem, size, off, fi); + if (res >= 0) + buf->buf[0].size = res; + } if (fs->debug && res >= 0) - fprintf(stderr, " read[%llu] %u bytes from %llu\n", - (unsigned long long) fi->fh, res, + fprintf(stderr, " read[%llu] %zu bytes from %llu\n", + (unsigned long long) fi->fh, + fuse_buf_size(*bufp), (unsigned long long) off); - if (res > (int) size) + if (res >= 0 && fuse_buf_size(*bufp) > (int) size) fprintf(stderr, "fuse: read too many bytes\n"); - return res; + if (res < 0) + return res; + + return 0; } else { return -ENOSYS; } } -int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, - size_t size, off64_t off, struct fuse_file_info *fi) +int fuse_fs_read(struct fuse_fs *fs, const char *path, char *mem, size_t size, + off64_t off, struct fuse_file_info *fi) +{ + int res; + struct fuse_bufvec *buf = NULL; + + res = fuse_fs_read_buf(fs, path, &buf, size, off, fi); + if (res == 0) { + struct fuse_bufvec dst = FUSE_BUFVEC_INIT(size); + + dst.buf[0].mem = mem; + res = fuse_buf_copy(&dst, buf, 0); + } + fuse_free_buf(buf); + + return res; +} + +int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec *buf, off64_t off, + struct fuse_file_info *fi) { fuse_get_context()->private_data = fs->user_data; - if (fs->op.write) { + if (fs->op.write_buf || fs->op.write) { int res; + size_t size = fuse_buf_size(buf); + assert(buf->idx == 0 && buf->off == 0); if (fs->debug) fprintf(stderr, - "write%s[%llu] %lu bytes to %llu flags: 0x%x\n", + "write%s[%llu] %zu bytes to %llu flags: 0x%x\n", fi->writepage ? "page" : "", (unsigned long long) fi->fh, - (unsigned long) size, (unsigned long long) off, + size, + (unsigned long long) off, fi->flags); - res = fs->op.write(path, buf, size, off, fi); + if (fs->op.write_buf) { + res = fs->op.write_buf(path, buf, off, fi); + } else { + void *mem = NULL; + struct fuse_buf *flatbuf; + struct fuse_bufvec tmp = FUSE_BUFVEC_INIT(size); + + if (buf->count == 1 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { + flatbuf = &buf->buf[0]; + } else { + res = -ENOMEM; + mem = malloc(size); + if (mem == NULL) + goto out; + + tmp.buf[0].mem = mem; + res = fuse_buf_copy(&tmp, buf, 0); + if (res <= 0) + goto out_free; + + tmp.buf[0].size = res; + flatbuf = &tmp.buf[0]; + } + res = fs->op.write(path, flatbuf->mem, flatbuf->size, + off, fi); +out_free: + free(mem); + } +out: if (fs->debug && res >= 0) fprintf(stderr, " write%s[%llu] %u bytes to %llu\n", fi->writepage ? "page" : "", @@ -1303,6 +1893,16 @@ int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, } } +int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *mem, + size_t size, off64_t off, struct fuse_file_info *fi) +{ + struct fuse_bufvec bufv = FUSE_BUFVEC_INIT(size); + + bufv.buf[0].mem = (void *) mem; + + return fuse_fs_write_buf(fs, path, &bufv, off, fi); +} + int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, struct fuse_file_info *fi) { @@ -1469,6 +2069,27 @@ int fuse_fs_lock(struct fuse_fs *fs, const char *path, } } +int fuse_fs_flock(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi, int op) +{ + fuse_get_context()->private_data = fs->user_data; + if (fs->op.flock) { + if (fs->debug) { + int xop = op & ~LOCK_NB; + + fprintf(stderr, "lock[%llu] %s%s\n", + (unsigned long long) fi->fh, + xop == LOCK_SH ? "LOCK_SH" : + (xop == LOCK_EX ? "LOCK_EX" : + (xop == LOCK_UN ? "LOCK_UN" : "???")), + (op & LOCK_NB) ? "|LOCK_NB" : ""); + } + return fs->op.flock(path, fi, op); + } else { + return -ENOSYS; + } +} + int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid) { fuse_get_context()->private_data = fs->user_data; @@ -1503,8 +2124,8 @@ int fuse_fs_ftruncate(struct fuse_fs *fs, const char *path, off64_t size, fuse_get_context()->private_data = fs->user_data; if (fs->op.ftruncate) { if (fs->debug) - fprintf(stderr, "ftruncate[%llu] %s %llu\n", - (unsigned long long) fi->fh, path, + fprintf(stderr, "ftruncate[%llu] %llu\n", + (unsigned long long) fi->fh, (unsigned long long) size); return fs->op.ftruncate(path, size, fi); @@ -1714,6 +2335,23 @@ int fuse_fs_poll(struct fuse_fs *fs, const char *path, return -ENOSYS; } +int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, + off64_t offset, off64_t length, struct fuse_file_info *fi) +{ + fuse_get_context()->private_data = fs->user_data; + if (fs->op.fallocate) { + if (fs->debug) + fprintf(stderr, "fallocate %s mode %x, offset: %llu, length: %llu\n", + path, + mode, + (unsigned long long) offset, + (unsigned long long) length); + + return fs->op.fallocate(path, mode, offset, length, fi); + } else + return -ENOSYS; +} + static int is_open(struct fuse *f, fuse_ino_t dir, const char *name) { struct node *node; @@ -1750,10 +2388,9 @@ static char *hidden_name(struct fuse *f, fuse_ino_t dir, const char *oldname, newnode = lookup_node(f, dir, newname); } while(newnode); - try_get_path(f, dir, newname, &newpath, NULL, 0); + res = try_get_path(f, dir, newname, &newpath, NULL, false); pthread_mutex_unlock(&f->lock); - - if (!newpath) + if (res) break; res = fuse_fs_getattr(f->fs, newpath, &buf); @@ -1861,7 +2498,7 @@ static struct fuse_context_i *fuse_get_context_internal(void) c = (struct fuse_context_i *) pthread_getspecific(fuse_context_key); if (c == NULL) { c = (struct fuse_context_i *) - malloc(sizeof(struct fuse_context_i)); + calloc(1, sizeof(struct fuse_context_i)); if (c == NULL) { /* This is hard to deal with properly, so just abort. If memory is so low that the @@ -1945,6 +2582,12 @@ static void reply_entry(fuse_req_t req, const struct fuse_entry_param *e, void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn) { fuse_get_context()->private_data = fs->user_data; + if (!fs->op.write_buf) + conn->want &= ~FUSE_CAP_SPLICE_READ; + if (!fs->op.lock) + conn->want &= ~FUSE_CAP_POSIX_LOCKS; + if (!fs->op.flock) + conn->want &= ~FUSE_CAP_FLOCK_LOCKS; if (fs->op.init) fs->user_data = fs->op.init(conn); } @@ -2038,17 +2681,34 @@ static void fuse_lib_lookup(fuse_req_t req, fuse_ino_t parent, reply_entry(req, &e, err); } +static void do_forget(struct fuse *f, fuse_ino_t ino, uint64_t nlookup) +{ + if (f->conf.debug) + fprintf(stderr, "FORGET %llu/%llu\n", (unsigned long long)ino, + (unsigned long long) nlookup); + forget_node(f, ino, nlookup); +} + static void fuse_lib_forget(fuse_req_t req, fuse_ino_t ino, unsigned long nlookup) +{ + do_forget(req_fuse(req), ino, nlookup); + fuse_reply_none(req); +} + +static void fuse_lib_forget_multi(fuse_req_t req, size_t count, + struct fuse_forget_data *forgets) { struct fuse *f = req_fuse(req); - if (f->conf.debug) - fprintf(stderr, "FORGET %llu/%lu\n", (unsigned long long)ino, - nlookup); - forget_node(f, ino, nlookup); + size_t i; + + for (i = 0; i < count; i++) + do_forget(f, forgets[i].ino, forgets[i].nlookup); + fuse_reply_none(req); } + static void fuse_lib_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { @@ -2059,7 +2719,7 @@ static void fuse_lib_getattr(fuse_req_t req, fuse_ino_t ino, memset(&buf, 0, sizeof(buf)); - if (fi != NULL) + if (fi != NULL && f->fs->op.fgetattr) err = get_path_nullok(f, ino, &path); else err = get_path(f, ino, &path); @@ -2074,11 +2734,15 @@ static void fuse_lib_getattr(fuse_req_t req, fuse_ino_t ino, free_path(f, ino, path); } if (!err) { - if (f->conf.auto_cache) { - pthread_mutex_lock(&f->lock); - update_stat(get_node(f, ino), &buf); - pthread_mutex_unlock(&f->lock); - } + struct node *node; + + pthread_mutex_lock(&f->lock); + node = get_node(f, ino); + if (node->is_hidden && buf.st_nlink > 0) + buf.st_nlink--; + if (f->conf.auto_cache) + update_stat(node, &buf); + pthread_mutex_unlock(&f->lock); set_stat(f, ino, &buf); fuse_reply_attr(req, &buf, f->conf.attr_timeout); } else @@ -2102,7 +2766,11 @@ static void fuse_lib_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, char *path; int err; - err = get_path(f, ino, &path); + if (valid == FUSE_SET_ATTR_SIZE && fi != NULL && + f->fs->op.ftruncate && f->fs->op.fgetattr) + err = get_path_nullok(f, ino, &path); + else + err = get_path(f, ino, &path); if (!err) { struct fuse_intr_data d; fuse_prepare_interrupt(f, req, &d); @@ -2124,6 +2792,29 @@ static void fuse_lib_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, err = fuse_fs_truncate(f->fs, path, attr->st_size); } +#ifdef HAVE_UTIMENSAT + if (!err && f->utime_omit_ok && + (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME))) { + struct timespec tv[2]; + + tv[0].tv_sec = 0; + tv[1].tv_sec = 0; + tv[0].tv_nsec = UTIME_OMIT; + tv[1].tv_nsec = UTIME_OMIT; + + if (valid & FUSE_SET_ATTR_ATIME_NOW) + tv[0].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_ATIME) + tv[0] = attr->st_atim; + + if (valid & FUSE_SET_ATTR_MTIME_NOW) + tv[1].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_MTIME) + tv[1] = attr->st_mtim; + + err = fuse_fs_utimens(f->fs, path, tv); + } else +#endif if (!err && (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) == (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { @@ -2134,8 +2825,12 @@ static void fuse_lib_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, tv[1].tv_nsec = ST_MTIM_NSEC(attr); err = fuse_fs_utimens(f->fs, path, tv); } - if (!err) - err = fuse_fs_getattr(f->fs, path, &buf); + if (!err) { + if (fi) + err = fuse_fs_fgetattr(f->fs, path, &buf, fi); + else + err = fuse_fs_getattr(f->fs, path, &buf); + } fuse_finish_interrupt(f, req, &d); free_path(f, ino, path); } @@ -2381,8 +3076,14 @@ static void fuse_do_release(struct fuse *f, fuse_ino_t ino, const char *path, { struct node *node; int unlink_hidden = 0; + const char *compatpath; + + if (path != NULL || f->nullpath_ok || f->conf.nopath) + compatpath = path; + else + compatpath = "-"; - fuse_fs_release(f->fs, (path || f->nullpath_ok) ? path : "-", fi); + fuse_fs_release(f->fs, compatpath, fi); pthread_mutex_lock(&f->lock); node = get_node(f, ino); @@ -2394,8 +3095,18 @@ static void fuse_do_release(struct fuse *f, fuse_ino_t ino, const char *path, } pthread_mutex_unlock(&f->lock); - if(unlink_hidden && path) - fuse_fs_unlink(f->fs, path); + if(unlink_hidden) { + if (path) { + fuse_fs_unlink(f->fs, path); + } else if (f->conf.nopath) { + char *unlinkpath; + + if (get_path(f, ino, &unlinkpath) == 0) + fuse_fs_unlink(f->fs, unlinkpath); + + free_path(f, ino, unlinkpath); + } + } } static void fuse_lib_create(fuse_req_t req, fuse_ino_t parent, @@ -2437,9 +3148,7 @@ static void fuse_lib_create(fuse_req_t req, fuse_ino_t parent, if (fuse_reply_create(req, &e, fi) == -ENOENT) { /* The open syscall was interrupted, so it must be cancelled */ - fuse_prepare_interrupt(f, req, &d); fuse_do_release(f, e.ino, path, fi); - fuse_finish_interrupt(f, req, &d); forget_node(f, e.ino, 1); } } else { @@ -2517,9 +3226,7 @@ static void fuse_lib_open(fuse_req_t req, fuse_ino_t ino, if (fuse_reply_open(req, fi) == -ENOENT) { /* The open syscall was interrupted, so it must be cancelled */ - fuse_prepare_interrupt(f, req, &d); fuse_do_release(f, ino, path, fi); - fuse_finish_interrupt(f, req, &d); } } else reply_err(req, err); @@ -2531,36 +3238,31 @@ static void fuse_lib_read(fuse_req_t req, fuse_ino_t ino, size_t size, off64_t off, struct fuse_file_info *fi) { struct fuse *f = req_fuse_prepare(req); + struct fuse_bufvec *buf = NULL; char *path; - char *buf; int res; - buf = (char *) malloc(size); - if (buf == NULL) { - reply_err(req, -ENOMEM); - return; - } - res = get_path_nullok(f, ino, &path); if (res == 0) { struct fuse_intr_data d; fuse_prepare_interrupt(f, req, &d); - res = fuse_fs_read(f->fs, path, buf, size, off, fi); + res = fuse_fs_read_buf(f->fs, path, &buf, size, off, fi); fuse_finish_interrupt(f, req, &d); free_path(f, ino, path); } - if (res >= 0) - fuse_reply_buf(req, buf, res); + if (res == 0) + fuse_reply_data(req, buf, FUSE_BUF_SPLICE_MOVE); else reply_err(req, res); - free(buf); + fuse_free_buf(buf); } -static void fuse_lib_write(fuse_req_t req, fuse_ino_t ino, const char *buf, - size_t size, off64_t off, struct fuse_file_info *fi) +static void fuse_lib_write_buf(fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec *buf, off64_t off, + struct fuse_file_info *fi) { struct fuse *f = req_fuse_prepare(req); char *path; @@ -2571,8 +3273,7 @@ static void fuse_lib_write(fuse_req_t req, fuse_ino_t ino, const char *buf, struct fuse_intr_data d; fuse_prepare_interrupt(f, req, &d); - res = fuse_fs_write(f->fs, path, buf, size, off, fi); - printf("died\n"); + res = fuse_fs_write_buf(f->fs, path, buf, off, fi); fuse_finish_interrupt(f, req, &d); free_path(f, ino, path); } @@ -2651,9 +3352,7 @@ static void fuse_lib_opendir(fuse_req_t req, fuse_ino_t ino, if (fuse_reply_open(req, llfi) == -ENOENT) { /* The opendir syscall was interrupted, so it must be cancelled */ - fuse_prepare_interrupt(f, req, &d); fuse_fs_releasedir(f->fs, path, &fi); - fuse_finish_interrupt(f, req, &d); pthread_mutex_destroy(&dh->lock); free(dh); } @@ -2747,7 +3446,10 @@ static int readdir_fill(struct fuse *f, fuse_req_t req, fuse_ino_t ino, char *path; int err; - err = get_path(f, ino, &path); + if (f->fs->op.readdir) + err = get_path_nullok(f, ino, &path); + else + err = get_path(f, ino, &path); if (!err) { struct fuse_intr_data d; @@ -2812,10 +3514,16 @@ static void fuse_lib_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info fi; struct fuse_dh *dh = get_dirhandle(llfi, &fi); char *path; + const char *compatpath; + + get_path_nullok(f, ino, &path); + if (path != NULL || f->nullpath_ok || f->conf.nopath) + compatpath = path; + else + compatpath = "-"; - get_path(f, ino, &path); fuse_prepare_interrupt(f, req, &d); - fuse_fs_releasedir(f->fs, (path || f->nullpath_ok) ? path : "-", &fi); + fuse_fs_releasedir(f->fs, compatpath, &fi); fuse_finish_interrupt(f, req, &d); free_path(f, ino, path); @@ -2837,7 +3545,7 @@ static void fuse_lib_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, get_dirhandle(llfi, &fi); - err = get_path(f, ino, &path); + err = get_path_nullok(f, ino, &path); if (!err) { struct fuse_intr_data d; fuse_prepare_interrupt(f, req, &d); @@ -3155,7 +3863,7 @@ static void fuse_lib_release(fuse_req_t req, fuse_ino_t ino, char *path; int err = 0; - get_path(f, ino, &path); + get_path_nullok(f, ino, &path); if (fi->flush) { err = fuse_flush_common(f, req, ino, path, fi); if (err == -ENOSYS) @@ -3177,7 +3885,7 @@ static void fuse_lib_flush(fuse_req_t req, fuse_ino_t ino, char *path; int err; - get_path(f, ino, &path); + get_path_nullok(f, ino, &path); err = fuse_flush_common(f, req, ino, path, fi); free_path(f, ino, path); @@ -3247,6 +3955,24 @@ static void fuse_lib_setlk(fuse_req_t req, fuse_ino_t ino, reply_err(req, err); } +static void fuse_lib_flock(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int op) +{ + struct fuse *f = req_fuse_prepare(req); + char *path; + int err; + + err = get_path_nullok(f, ino, &path); + if (err == 0) { + struct fuse_intr_data d; + fuse_prepare_interrupt(f, req, &d); + err = fuse_fs_flock(f->fs, path, fi, op); + fuse_finish_interrupt(f, req, &d); + free_path(f, ino, path); + } + reply_err(req, err); +} + static void fuse_lib_bmap(fuse_req_t req, fuse_ino_t ino, size_t blocksize, uint64_t idx) { @@ -3293,7 +4019,7 @@ static void fuse_lib_ioctl(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg, if (out_buf) memcpy(out_buf, in_buf, in_bufsz); - err = get_path(f, ino, &path); + err = get_path_nullok(f, ino, &path); if (err) goto err; @@ -3319,20 +4045,93 @@ static void fuse_lib_poll(fuse_req_t req, fuse_ino_t ino, struct fuse *f = req_fuse_prepare(req); struct fuse_intr_data d; char *path; - int ret; + int err; unsigned revents = 0; - ret = get_path(f, ino, &path); - if (!ret) { + err = get_path_nullok(f, ino, &path); + if (!err) { fuse_prepare_interrupt(f, req, &d); - ret = fuse_fs_poll(f->fs, path, fi, ph, &revents); + err = fuse_fs_poll(f->fs, path, fi, ph, &revents); fuse_finish_interrupt(f, req, &d); free_path(f, ino, path); } - if (!ret) + if (!err) fuse_reply_poll(req, revents); else - reply_err(req, ret); + reply_err(req, err); +} + +static void fuse_lib_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, + off64_t offset, off64_t length, struct fuse_file_info *fi) +{ + struct fuse *f = req_fuse_prepare(req); + struct fuse_intr_data d; + char *path; + int err; + + err = get_path_nullok(f, ino, &path); + if (!err) { + fuse_prepare_interrupt(f, req, &d); + err = fuse_fs_fallocate(f->fs, path, mode, offset, length, fi); + fuse_finish_interrupt(f, req, &d); + free_path(f, ino, path); + } + reply_err(req, err); +} + +static int clean_delay(struct fuse *f) +{ + /* + * This is calculating the delay between clean runs. To + * reduce the number of cleans we are doing them 10 times + * within the remember window. + */ + int min_sleep = 60; + int max_sleep = 3600; + int sleep_time = f->conf.remember / 10; + + if (sleep_time > max_sleep) + return max_sleep; + if (sleep_time < min_sleep) + return min_sleep; + return sleep_time; +} + +int fuse_clean_cache(struct fuse *f) +{ + struct node_lru *lnode; + struct list_head *curr, *next; + struct node *node; + struct timespec now; + + pthread_mutex_lock(&f->lock); + + curr_time(&now); + + for (curr = f->lru_table.next; curr != &f->lru_table; curr = next) { + double age; + + next = curr->next; + lnode = list_entry(curr, struct node_lru, lru); + node = &lnode->node; + + age = diff_timespec(&now, &lnode->forget_time); + if (age <= f->conf.remember) + break; + + assert(node->nlookup == 1); + + /* Don't forget active directories */ + if (node->refctr > 1) + continue; + + node->nlookup = 0; + unhash_name(f, node); + unref_node(f, node); + } + pthread_mutex_unlock(&f->lock); + + return clean_delay(f); } static struct fuse_lowlevel_ops fuse_path_ops = { @@ -3340,6 +4139,7 @@ static struct fuse_lowlevel_ops fuse_path_ops = { .destroy = fuse_lib_destroy, .lookup = fuse_lib_lookup, .forget = fuse_lib_forget, + .forget_multi = fuse_lib_forget_multi, .getattr = fuse_lib_getattr, .setattr = fuse_lib_setattr, .access = fuse_lib_access, @@ -3354,7 +4154,7 @@ static struct fuse_lowlevel_ops fuse_path_ops = { .create = fuse_lib_create, .open = fuse_lib_open, .read = fuse_lib_read, - .write = fuse_lib_write, + .write_buf = fuse_lib_write_buf, .flush = fuse_lib_flush, .release = fuse_lib_release, .fsync = fuse_lib_fsync, @@ -3369,9 +4169,11 @@ static struct fuse_lowlevel_ops fuse_path_ops = { .removexattr = fuse_lib_removexattr, .getlk = fuse_lib_getlk, .setlk = fuse_lib_setlk, + .flock = fuse_lib_flock, .bmap = fuse_lib_bmap, .ioctl = fuse_lib_ioctl, .poll = fuse_lib_poll, + .fallocate = fuse_lib_fallocate, }; int fuse_notify_poll(struct fuse_pollhandle *ph) @@ -3436,12 +4238,77 @@ struct fuse_cmd *fuse_read_cmd(struct fuse *f) return cmd; } +static int fuse_session_loop_remember(struct fuse *f) +{ + struct fuse_session *se = f->se; + int res = 0; + struct timespec now; + time_t next_clean; + struct fuse_chan *ch = fuse_session_next_chan(se, NULL); + size_t bufsize = fuse_chan_bufsize(ch); + char *buf = (char *) malloc(bufsize); + struct pollfd fds = { + .fd = fuse_chan_fd(ch), + .events = POLLIN + }; + + if (!buf) { + fprintf(stderr, "fuse: failed to allocate read buffer\n"); + return -1; + } + + curr_time(&now); + next_clean = now.tv_sec; + while (!fuse_session_exited(se)) { + struct fuse_chan *tmpch = ch; + struct fuse_buf fbuf = { + .mem = buf, + .size = bufsize, + }; + unsigned timeout; + + curr_time(&now); + if (now.tv_sec < next_clean) + timeout = next_clean - now.tv_sec; + else + timeout = 0; + + res = poll(&fds, 1, timeout * 1000); + if (res == -1) { + if (errno == -EINTR) + continue; + else + break; + } else if (res > 0) { + res = fuse_session_receive_buf(se, &fbuf, &tmpch); + + if (res == -EINTR) + continue; + if (res <= 0) + break; + + fuse_session_process_buf(se, &fbuf, tmpch); + } else { + timeout = fuse_clean_cache(f); + curr_time(&now); + next_clean = now.tv_sec + timeout; + } + } + + free(buf); + fuse_session_reset(se); + return res < 0 ? -1 : 0; +} + int fuse_loop(struct fuse *f) { - if (f) - return fuse_session_loop(f->se); - else + if (!f) return -1; + + if (lru_enabled(f)) + return fuse_session_loop_remember(f); + + return fuse_session_loop(f->se); } int fuse_invalidate(struct fuse *f, const char *path) @@ -3521,7 +4388,9 @@ static const struct fuse_opt fuse_lib_opts[] = { FUSE_LIB_OPT("ac_attr_timeout=%lf", ac_attr_timeout, 0), FUSE_LIB_OPT("ac_attr_timeout=", ac_attr_timeout_set, 1), FUSE_LIB_OPT("negative_timeout=%lf", negative_timeout, 0), - FUSE_LIB_OPT("noforget", noforget, 1), + FUSE_LIB_OPT("noforget", remember, -1), + FUSE_LIB_OPT("remember=%u", remember, 0), + FUSE_LIB_OPT("nopath", nopath, 1), FUSE_LIB_OPT("intr", intr, 1), FUSE_LIB_OPT("intr_signal=%d", intr_signal, 0), FUSE_LIB_OPT("modules=%s", modules, 0), @@ -3544,6 +4413,8 @@ static void fuse_lib_help(void) " -o negative_timeout=T cache timeout for deleted names (0.0s)\n" " -o attr_timeout=T cache timeout for attributes (1.0s)\n" " -o ac_attr_timeout=T auto cache timeout for attributes (attr_timeout)\n" +" -o noforget never forget cached inodes\n" +" -o remember=T remember cached inodes for T seconds (0s)\n" " -o intr allow requests to be interrupted\n" " -o intr_signal=NUM signal to send on interrupt (%i)\n" " -o modules=M1[:M2...] names of modules to push onto filesystem stack\n" @@ -3643,6 +4514,8 @@ static int fuse_push_module(struct fuse *f, const char *module, newfs->m = m; f->fs = newfs; f->nullpath_ok = newfs->op.flag_nullpath_ok && f->nullpath_ok; + f->conf.nopath = newfs->op.flag_nopath && f->conf.nopath; + f->utime_omit_ok = newfs->op.flag_utime_omit_ok && f->utime_omit_ok; return 0; } @@ -3668,6 +4541,50 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, return fs; } +static int node_table_init(struct node_table *t) +{ + t->size = NODE_TABLE_MIN_SIZE; + t->array = (struct node **) calloc(1, sizeof(struct node *) * t->size); + if (t->array == NULL) { + fprintf(stderr, "fuse: memory allocation failed\n"); + return -1; + } + t->use = 0; + t->split = 0; + + return 0; +} + +static void *fuse_prune_nodes(void *fuse) +{ + struct fuse *f = fuse; + int sleep_time; + + while(1) { + sleep_time = fuse_clean_cache(f); + sleep(sleep_time); + } + return NULL; +} + +int fuse_start_cleanup_thread(struct fuse *f) +{ + if (lru_enabled(f)) + return fuse_start_thread(&f->prune_thread, fuse_prune_nodes, f); + + return 0; +} + +void fuse_stop_cleanup_thread(struct fuse *f) +{ + if (lru_enabled(f)) { + pthread_mutex_lock(&f->lock); + pthread_cancel(f->prune_thread); + pthread_mutex_unlock(&f->lock); + pthread_join(f->prune_thread, NULL); + } +} + struct fuse *fuse_new_common(struct fuse_chan *ch, struct fuse_args *args, const struct fuse_operations *op, size_t op_size, void *user_data, int compat) @@ -3693,6 +4610,8 @@ struct fuse *fuse_new_common(struct fuse_chan *ch, struct fuse_args *args, fs->compat = compat; f->fs = fs; f->nullpath_ok = fs->op.flag_nullpath_ok; + f->conf.nopath = fs->op.flag_nopath; + f->utime_omit_ok = fs->op.flag_utime_omit_ok; /* Oh f**k, this is ugly! */ if (!fs->op.lock) { @@ -3705,6 +4624,11 @@ struct fuse *fuse_new_common(struct fuse_chan *ch, struct fuse_args *args, f->conf.negative_timeout = 0.0; f->conf.intr_signal = FUSE_DEFAULT_INTR_SIGNAL; + f->pagesize = getpagesize(); + init_list_head(&f->partial_slabs); + init_list_head(&f->full_slabs); + init_list_head(&f->lru_table); + if (fuse_opt_parse(args, &f->conf, fuse_lib_opts, fuse_lib_opt_proc) == -1) goto out_free_fs; @@ -3727,7 +4651,7 @@ struct fuse *fuse_new_common(struct fuse_chan *ch, struct fuse_args *args, if (!f->conf.ac_attr_timeout_set) f->conf.ac_attr_timeout = f->conf.attr_timeout; -#ifdef __FreeBSD__ +#if defined(__FreeBSD__) || defined(__NetBSD__) /* * In FreeBSD, we always use these settings as inode numbers * are needed to make getcwd(3) work. @@ -3749,66 +4673,51 @@ struct fuse *fuse_new_common(struct fuse_chan *ch, struct fuse_args *args, fuse_session_add_chan(f->se, ch); - if (f->conf.debug) + if (f->conf.debug) { fprintf(stderr, "nullpath_ok: %i\n", f->nullpath_ok); + fprintf(stderr, "nopath: %i\n", f->conf.nopath); + fprintf(stderr, "utime_omit_ok: %i\n", f->utime_omit_ok); + } /* Trace topmost layer by default */ f->fs->debug = f->conf.debug; f->ctr = 0; f->generation = 0; - /* FIXME: Dynamic hash table */ - f->name_table_size = 14057; - f->name_table = (struct node **) - calloc(1, sizeof(struct node *) * f->name_table_size); - if (f->name_table == NULL) { - fprintf(stderr, "fuse: memory allocation failed\n"); + if (node_table_init(&f->name_table) == -1) goto out_free_session; - } - f->id_table_size = 14057; - f->id_table = (struct node **) - calloc(1, sizeof(struct node *) * f->id_table_size); - if (f->id_table == NULL) { - fprintf(stderr, "fuse: memory allocation failed\n"); + if (node_table_init(&f->id_table) == -1) goto out_free_name_table; - } fuse_mutex_init(&f->lock); - root = (struct node *) calloc(1, sizeof(struct node)); + root = alloc_node(f); if (root == NULL) { fprintf(stderr, "fuse: memory allocation failed\n"); goto out_free_id_table; } - root->name = strdup("/"); - if (root->name == NULL) { - fprintf(stderr, "fuse: memory allocation failed\n"); - goto out_free_root; - } + strcpy(root->inline_name, "/"); + root->name = root->inline_name; if (f->conf.intr && fuse_init_intr_signal(f->conf.intr_signal, &f->intr_installed) == -1) - goto out_free_root_name; + goto out_free_root; root->parent = NULL; root->nodeid = FUSE_ROOT_ID; - root->generation = 0; - root->refctr = 1; - root->nlookup = 1; + inc_nlookup(root); hash_id(f, root); return f; -out_free_root_name: - free(root->name); out_free_root: free(root); out_free_id_table: - free(f->id_table); + free(f->id_table.array); out_free_name_table: - free(f->name_table); + free(f->name_table.array); out_free_session: fuse_session_destroy(f->se); out_free_fs: @@ -3845,14 +4754,14 @@ void fuse_destroy(struct fuse *f) memset(c, 0, sizeof(*c)); c->ctx.fuse = f; - for (i = 0; i < f->id_table_size; i++) { + for (i = 0; i < f->id_table.size; i++) { struct node *node; - for (node = f->id_table[i]; node != NULL; + for (node = f->id_table.array[i]; node != NULL; node = node->id_next) { if (node->is_hidden) { char *path; - if (try_get_path(f, node->nodeid, NULL, &path, NULL, 0) == 0) { + if (try_get_path(f, node->nodeid, NULL, &path, NULL, false) == 0) { fuse_fs_unlink(f->fs, path); free(path); } @@ -3860,17 +4769,21 @@ void fuse_destroy(struct fuse *f) } } } - for (i = 0; i < f->id_table_size; i++) { + for (i = 0; i < f->id_table.size; i++) { struct node *node; struct node *next; - for (node = f->id_table[i]; node != NULL; node = next) { + for (node = f->id_table.array[i]; node != NULL; node = next) { next = node->id_next; - free_node(node); + free_node(f, node); + f->id_table.use--; } } - free(f->id_table); - free(f->name_table); + assert(list_empty(&f->partial_slabs)); + assert(list_empty(&f->full_slabs)); + + free(f->id_table.array); + free(f->name_table.array); pthread_mutex_destroy(&f->lock); fuse_session_destroy(f->se); free(f->conf.modules); @@ -3903,7 +4816,7 @@ void fuse_register_module(struct fuse_module *mod) fuse_modules = mod; } -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) static struct fuse *fuse_new_common_compat(int fd, const char *opts, const struct fuse_operations *op, @@ -3960,7 +4873,7 @@ FUSE_SYMVER(".symver fuse_set_getcontext_func,__fuse_set_getcontext_func@"); FUSE_SYMVER(".symver fuse_new_compat2,fuse_new@"); FUSE_SYMVER(".symver fuse_new_compat22,fuse_new@FUSE_2.2"); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ struct fuse *fuse_new_compat25(int fd, struct fuse_args *args, const struct fuse_operations_compat25 *op, diff --git a/fuse/fuse_i.h b/fuse/fuse_i.h index 6285c952e..78f14677a 100644 --- a/fuse/fuse_i.h +++ b/fuse/fuse_i.h @@ -8,7 +8,6 @@ #include "fuse.h" #include "fuse_lowlevel.h" -#include struct fuse_chan; struct fuse_ll; @@ -16,6 +15,12 @@ struct fuse_ll; struct fuse_session { struct fuse_session_ops op; + int (*receive_buf)(struct fuse_session *se, struct fuse_buf *buf, + struct fuse_chan **chp); + + void (*process_buf)(void *data, const struct fuse_buf *buf, + struct fuse_chan *ch); + void *data; volatile int exited; @@ -31,6 +36,7 @@ struct fuse_req { struct fuse_ctx ctx; struct fuse_chan *ch; int interrupted; + unsigned int ioctl_64bit : 1; union { struct { uint64_t unique; @@ -44,12 +50,27 @@ struct fuse_req { struct fuse_req *prev; }; +struct fuse_notify_req { + uint64_t unique; + void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, + const void *, const struct fuse_buf *); + struct fuse_notify_req *next; + struct fuse_notify_req *prev; +}; + struct fuse_ll { int debug; int allow_root; int atomic_o_trunc; - int no_remote_lock; + int no_remote_posix_lock; + int no_remote_flock; int big_writes; + int splice_write; + int splice_move; + int splice_read; + int no_splice_write; + int no_splice_move; + int no_splice_read; struct fuse_lowlevel_ops op; int got_init; struct cuse_data *cuse_data; @@ -60,6 +81,10 @@ struct fuse_ll { struct fuse_req interrupts; pthread_mutex_t lock; int got_destroy; + pthread_key_t pipe_key; + int broken_splice_nonblock; + uint64_t notify_ctr; + struct fuse_notify_req notify_list; }; struct fuse_cmd { @@ -99,3 +124,5 @@ struct fuse *fuse_setup_common(int argc, char *argv[], int compat); void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); + +int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); diff --git a/fuse/fuse_kern_chan.c b/fuse/fuse_kern_chan.c index 03291c361..5f77bbf56 100644 --- a/fuse/fuse_kern_chan.c +++ b/fuse/fuse_kern_chan.c @@ -40,7 +40,7 @@ restart: fuse_session_exit(se); return 0; } - /* Errors occuring during normal operation: EINTR (read + /* Errors occurring during normal operation: EINTR (read interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem umounted) */ if (err != EINTR && err != EAGAIN) diff --git a/fuse/fuse_loop.c b/fuse/fuse_loop.c index 104c5d431..b7b4ca4ee 100644 --- a/fuse/fuse_loop.c +++ b/fuse/fuse_loop.c @@ -25,12 +25,19 @@ int fuse_session_loop(struct fuse_session *se) while (!fuse_session_exited(se)) { struct fuse_chan *tmpch = ch; - res = fuse_chan_recv(&tmpch, buf, bufsize); + struct fuse_buf fbuf = { + .mem = buf, + .size = bufsize, + }; + + res = fuse_session_receive_buf(se, &fbuf, &tmpch); + if (res == -EINTR) continue; if (res <= 0) break; - fuse_session_process(se, buf, res, tmpch); + + fuse_session_process_buf(se, &fbuf, tmpch); } free(buf); diff --git a/fuse/fuse_loop_mt.c b/fuse/fuse_loop_mt.c index a73c399b7..7e400c2a4 100644 --- a/fuse/fuse_loop_mt.c +++ b/fuse/fuse_loop_mt.c @@ -9,6 +9,7 @@ #include "fuse_lowlevel.h" #include "fuse_misc.h" #include "fuse_kernel.h" +#include "fuse_i.h" #include #include @@ -18,8 +19,7 @@ #include #include #include - -#ifdef __MULTI_THREAD +#include /* Environment var controlling the thread stack size */ #define ENVNAME_THREAD_STACK "FUSE_THREAD_STACK" @@ -65,7 +65,7 @@ static void list_del_worker(struct fuse_worker *w) #define PTHREAD_CANCEL_ENABLE 0 #define PTHREAD_CANCEL_DISABLE 1 -static int fuse_start_thread(struct fuse_mt *mt); +static int fuse_loop_start_thread(struct fuse_mt *mt); static void *fuse_do_work(void *data) { @@ -75,10 +75,14 @@ static void *fuse_do_work(void *data) while (!fuse_session_exited(mt->se)) { int isforget = 0; struct fuse_chan *ch = mt->prevch; + struct fuse_buf fbuf = { + .mem = w->buf, + .size = w->bufsize, + }; int res; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - res = fuse_chan_recv(&ch, w->buf, w->bufsize); + res = fuse_session_receive_buf(mt->se, &fbuf, &ch); pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); if (res == -EINTR) continue; @@ -100,16 +104,21 @@ static void *fuse_do_work(void *data) * This disgusting hack is needed so that zillions of threads * are not created on a burst of FORGET messages */ - if (((struct fuse_in_header *) w->buf)->opcode == FUSE_FORGET) - isforget = 1; + if (!(fbuf.flags & FUSE_BUF_IS_FD)) { + struct fuse_in_header *in = fbuf.mem; + + if (in->opcode == FUSE_FORGET || + in->opcode == FUSE_BATCH_FORGET) + isforget = 1; + } if (!isforget) mt->numavail--; if (mt->numavail == 0) - fuse_start_thread(mt); + fuse_loop_start_thread(mt); pthread_mutex_unlock(&mt->lock); - fuse_session_process(mt->se, w->buf, res, ch); + fuse_session_process_buf(mt->se, &fbuf, ch); pthread_mutex_lock(&mt->lock); if (!isforget) @@ -133,33 +142,17 @@ static void *fuse_do_work(void *data) } sem_post(&mt->finish); - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pause(); return NULL; } -static int fuse_start_thread(struct fuse_mt *mt) +int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg) { sigset_t oldset; sigset_t newset; int res; pthread_attr_t attr; char *stack_size; - struct fuse_worker *w = malloc(sizeof(struct fuse_worker)); - if (!w) { - fprintf(stderr, "fuse: failed to allocate worker structure\n"); - return -1; - } - memset(w, 0, sizeof(struct fuse_worker)); - w->bufsize = fuse_chan_bufsize(mt->prevch); - w->buf = malloc(w->bufsize); - w->mt = mt; - if (!w->buf) { - fprintf(stderr, "fuse: failed to allocate read buffer\n"); - free(w); - return -1; - } /* Override default stack size */ pthread_attr_init(&attr); @@ -174,12 +167,38 @@ static int fuse_start_thread(struct fuse_mt *mt) sigaddset(&newset, SIGHUP); sigaddset(&newset, SIGQUIT); pthread_sigmask(SIG_BLOCK, &newset, &oldset); - res = pthread_create(&w->thread_id, &attr, fuse_do_work, w); + res = pthread_create(thread_id, &attr, func, arg); pthread_sigmask(SIG_SETMASK, &oldset, NULL); pthread_attr_destroy(&attr); if (res != 0) { fprintf(stderr, "fuse: error creating thread: %s\n", strerror(res)); + return -1; + } + + return 0; +} + +static int fuse_loop_start_thread(struct fuse_mt *mt) +{ + int res; + struct fuse_worker *w = malloc(sizeof(struct fuse_worker)); + if (!w) { + fprintf(stderr, "fuse: failed to allocate worker structure\n"); + return -1; + } + memset(w, 0, sizeof(struct fuse_worker)); + w->bufsize = fuse_chan_bufsize(mt->prevch); + w->buf = malloc(w->bufsize); + w->mt = mt; + if (!w->buf) { + fprintf(stderr, "fuse: failed to allocate read buffer\n"); + free(w); + return -1; + } + + res = fuse_start_thread(&w->thread_id, fuse_do_work, w); + if (res == -1) { free(w->buf); free(w); return -1; @@ -219,7 +238,7 @@ int fuse_session_loop_mt(struct fuse_session *se) fuse_mutex_init(&mt.lock); pthread_mutex_lock(&mt.lock); - err = fuse_start_thread(&mt); + err = fuse_loop_start_thread(&mt); pthread_mutex_unlock(&mt.lock); if (!err) { /* sem_wait() is interruptible */ @@ -229,7 +248,6 @@ int fuse_session_loop_mt(struct fuse_session *se) for (w = mt.main.next; w != &mt.main; w = w->next) pthread_cancel(w->thread_id); mt.exit = 1; - pthread_mutex_unlock(&mt.lock); while (mt.main.next != &mt.main) fuse_join_worker(&mt, mt.main.next); @@ -242,5 +260,3 @@ int fuse_session_loop_mt(struct fuse_session *se) fuse_session_reset(se); return err; } - -#endif diff --git a/fuse/fuse_lowlevel.c b/fuse/fuse_lowlevel.c index 2e0ad0fd9..103f83154 100644 --- a/fuse/fuse_lowlevel.c +++ b/fuse/fuse_lowlevel.c @@ -6,6 +6,9 @@ See the file COPYING.LIB */ +#define _GNU_SOURCE + +#include "config.h" #include "fuse_i.h" #include "fuse_kernel.h" #include "fuse_opt.h" @@ -13,7 +16,6 @@ #include "fuse_common_compat.h" #include "fuse_lowlevel_compat.h" -#define linux #include #include #include @@ -21,16 +23,36 @@ #include #include #include +#include + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#ifndef F_SETPIPE_SZ +#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +#endif + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) #define OFFSET_MAX 0x7fffffffffffffffLL +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + struct fuse_pollhandle { uint64_t kh; struct fuse_chan *ch; struct fuse_ll *f; }; +static size_t pagesize; + +static __attribute__((constructor)) void fuse_ll_init_pagesize(void) +{ + pagesize = getpagesize(); +} + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) { attr->ino = stbuf->st_ino; @@ -116,6 +138,49 @@ void fuse_free_req(fuse_req_t req) destroy_req(req); } +static struct fuse_req *fuse_ll_alloc_req(struct fuse_ll *f) +{ + struct fuse_req *req; + + req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); + if (req == NULL) { + fprintf(stderr, "fuse: failed to allocate request\n"); + } else { + req->f = f; + req->ctr = 1; + list_init_req(req); + fuse_mutex_init(&req->lock); + } + + return req; +} + + +static int fuse_send_msg(struct fuse_ll *f, struct fuse_chan *ch, + struct iovec *iov, int count) +{ + struct fuse_out_header *out = iov[0].iov_base; + + out->len = iov_length(iov, count); + if (f->debug) { + if (out->unique == 0) { + fprintf(stderr, "NOTIFY: code=%d length=%u\n", + out->error, out->len); + } else if (out->error) { + fprintf(stderr, + " unique: %llu, error: %i (%s), outsize: %i\n", + (unsigned long long) out->unique, out->error, + strerror(-out->error), out->len); + } else { + fprintf(stderr, + " unique: %llu, success, outsize: %i\n", + (unsigned long long) out->unique, out->len); + } + } + + return fuse_chan_send(ch, iov, count); +} + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, int count) { @@ -128,24 +193,11 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, out.unique = req->unique; out.error = error; + iov[0].iov_base = &out; iov[0].iov_len = sizeof(struct fuse_out_header); - out.len = iov_length(iov, count); - - if (req->f->debug) { - if (out.error) { - fprintf(stderr, - " unique: %llu, error: %i (%s), outsize: %i\n", - (unsigned long long) out.unique, out.error, - strerror(-out.error), out.len); - } else { - fprintf(stderr, - " unique: %llu, success, outsize: %i\n", - (unsigned long long) out.unique, out.len); - } - } - return fuse_chan_send(req->ch, iov, count); + return fuse_send_msg(req->f, req->ch, iov, count); } static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, @@ -177,10 +229,8 @@ int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) struct iovec *padded_iov; padded_iov = malloc((count + 1) * sizeof(struct iovec)); - if (padded_iov == NULL) { - printf("ENOMEM fuse_reply_iov\n"); - return fuse_reply_err(req, -ENOMEM); - } + if (padded_iov == NULL) + return fuse_reply_err(req, ENOMEM); memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); count++; @@ -253,7 +303,8 @@ int fuse_reply_err(fuse_req_t req, int err) void fuse_reply_none(fuse_req_t req) { - fuse_chan_send(req->ch, NULL, 0); + if (req->ch) + fuse_chan_send(req->ch, NULL, 0); fuse_free_req(req); } @@ -310,10 +361,8 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant negative entry */ - if (!e->ino && req->f->conn.proto_minor < 4) { - printf("ENOENT fuse_reply_entry\n"); + if (!e->ino && req->f->conn.proto_minor < 4) return fuse_reply_err(req, ENOENT); - } memset(&arg, 0, sizeof(arg)); fill_entry(&arg, e); @@ -380,6 +429,355 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) return send_reply_ok(req, buf, size); } +static int fuse_send_data_iov_fallback(struct fuse_ll *f, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, + size_t len) +{ + struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); + void *mbuf; + int res; + + /* Optimize common case */ + if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { + /* FIXME: also avoid memory copy if there are multiple buffers + but none of them contain an fd */ + + iov[iov_count].iov_base = buf->buf[0].mem; + iov[iov_count].iov_len = len; + iov_count++; + return fuse_send_msg(f, ch, iov, iov_count); + } + + res = posix_memalign(&mbuf, pagesize, len); + if (res != 0) + return res; + + mem_buf.buf[0].mem = mbuf; + res = fuse_buf_copy(&mem_buf, buf, 0); + if (res < 0) { + free(mbuf); + return -res; + } + len = res; + + iov[iov_count].iov_base = mbuf; + iov[iov_count].iov_len = len; + iov_count++; + res = fuse_send_msg(f, ch, iov, iov_count); + free(mbuf); + + return res; +} + +struct fuse_ll_pipe { + size_t size; + int can_grow; + int pipe[2]; +}; + +static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) +{ + close(llp->pipe[0]); + close(llp->pipe[1]); + free(llp); +} + +#ifdef HAVE_SPLICE +static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_ll *f) +{ + struct fuse_ll_pipe *llp = pthread_getspecific(f->pipe_key); + if (llp == NULL) { + int res; + + llp = malloc(sizeof(struct fuse_ll_pipe)); + if (llp == NULL) + return NULL; + + res = pipe(llp->pipe); + if (res == -1) { + free(llp); + return NULL; + } + + if (fcntl(llp->pipe[0], F_SETFL, O_NONBLOCK) == -1 || + fcntl(llp->pipe[1], F_SETFL, O_NONBLOCK) == -1) { + close(llp->pipe[0]); + close(llp->pipe[1]); + free(llp); + return NULL; + } + + /* + *the default size is 16 pages on linux + */ + llp->size = pagesize * 16; + llp->can_grow = 1; + + pthread_setspecific(f->pipe_key, llp); + } + + return llp; +} +#endif + +static void fuse_ll_clear_pipe(struct fuse_ll *f) +{ + struct fuse_ll_pipe *llp = pthread_getspecific(f->pipe_key); + if (llp) { + pthread_setspecific(f->pipe_key, NULL); + fuse_ll_pipe_free(llp); + } +} + +#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) +static int read_back(int fd, char *buf, size_t len) +{ + int res; + + res = read(fd, buf, len); + if (res == -1) { + fprintf(stderr, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); + return -EIO; + } + if (res != len) { + fprintf(stderr, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); + return -EIO; + } + return 0; +} + +static int fuse_send_data_iov(struct fuse_ll *f, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags) +{ + int res; + size_t len = fuse_buf_size(buf); + struct fuse_out_header *out = iov[0].iov_base; + struct fuse_ll_pipe *llp; + int splice_flags; + size_t pipesize; + size_t total_fd_size; + size_t idx; + size_t headerlen; + struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); + + if (f->broken_splice_nonblock) + goto fallback; + + if (flags & FUSE_BUF_NO_SPLICE) + goto fallback; + + total_fd_size = 0; + for (idx = buf->idx; idx < buf->count; idx++) { + if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { + total_fd_size = buf->buf[idx].size; + if (idx == buf->idx) + total_fd_size -= buf->off; + } + } + if (total_fd_size < 2 * pagesize) + goto fallback; + + if (f->conn.proto_minor < 14 || + !(f->conn.want & FUSE_CAP_SPLICE_WRITE)) + goto fallback; + + llp = fuse_ll_get_pipe(f); + if (llp == NULL) + goto fallback; + + + headerlen = iov_length(iov, iov_count); + + out->len = headerlen + len; + + /* + * Heuristic for the required pipe size, does not work if the + * source contains less than page size fragments + */ + pipesize = pagesize * (iov_count + buf->count + 1) + out->len; + + if (llp->size < pipesize) { + if (llp->can_grow) { + res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); + if (res == -1) { + llp->can_grow = 0; + goto fallback; + } + llp->size = res; + } + if (llp->size < pipesize) + goto fallback; + } + + + res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); + if (res == -1) + goto fallback; + + if (res != headerlen) { + res = -EIO; + fprintf(stderr, "fuse: short vmsplice to pipe: %u/%zu\n", res, + headerlen); + goto clear_pipe; + } + + pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; + pipe_buf.buf[0].fd = llp->pipe[1]; + + res = fuse_buf_copy(&pipe_buf, buf, + FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); + if (res < 0) { + if (res == -EAGAIN || res == -EINVAL) { + /* + * Should only get EAGAIN on kernels with + * broken SPLICE_F_NONBLOCK support (<= + * 2.6.35) where this error or a short read is + * returned even if the pipe itself is not + * full + * + * EINVAL might mean that splice can't handle + * this combination of input and output. + */ + if (res == -EAGAIN) + f->broken_splice_nonblock = 1; + + pthread_setspecific(f->pipe_key, NULL); + fuse_ll_pipe_free(llp); + goto fallback; + } + res = -res; + goto clear_pipe; + } + + if (res != 0 && res < len) { + struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); + void *mbuf; + size_t now_len = res; + /* + * For regular files a short count is either + * 1) due to EOF, or + * 2) because of broken SPLICE_F_NONBLOCK (see above) + * + * For other inputs it's possible that we overflowed + * the pipe because of small buffer fragments. + */ + + res = posix_memalign(&mbuf, pagesize, len); + if (res != 0) + goto clear_pipe; + + mem_buf.buf[0].mem = mbuf; + mem_buf.off = now_len; + res = fuse_buf_copy(&mem_buf, buf, 0); + if (res > 0) { + char *tmpbuf; + size_t extra_len = res; + /* + * Trickiest case: got more data. Need to get + * back the data from the pipe and then fall + * back to regular write. + */ + tmpbuf = malloc(headerlen); + if (tmpbuf == NULL) { + free(mbuf); + res = ENOMEM; + goto clear_pipe; + } + res = read_back(llp->pipe[0], tmpbuf, headerlen); + if (res != 0) { + free(mbuf); + goto clear_pipe; + } + free(tmpbuf); + res = read_back(llp->pipe[0], mbuf, now_len); + if (res != 0) { + free(mbuf); + goto clear_pipe; + } + len = now_len + extra_len; + iov[iov_count].iov_base = mbuf; + iov[iov_count].iov_len = len; + iov_count++; + res = fuse_send_msg(f, ch, iov, iov_count); + free(mbuf); + return res; + } + free(mbuf); + res = now_len; + } + len = res; + out->len = headerlen + len; + + if (f->debug) { + fprintf(stderr, + " unique: %llu, success, outsize: %i (splice)\n", + (unsigned long long) out->unique, out->len); + } + + splice_flags = 0; + if ((flags & FUSE_BUF_SPLICE_MOVE) && + (f->conn.want & FUSE_CAP_SPLICE_MOVE)) + splice_flags |= SPLICE_F_MOVE; + + res = splice(llp->pipe[0], NULL, + fuse_chan_fd(ch), NULL, out->len, splice_flags); + if (res == -1) { + res = -errno; + perror("fuse: splice from pipe"); + goto clear_pipe; + } + if (res != out->len) { + res = -EIO; + fprintf(stderr, "fuse: short splice from pipe: %u/%u\n", + res, out->len); + goto clear_pipe; + } + return 0; + +clear_pipe: + fuse_ll_clear_pipe(f); + return res; + +fallback: + return fuse_send_data_iov_fallback(f, ch, iov, iov_count, buf, len); +} +#else +static int fuse_send_data_iov(struct fuse_ll *f, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags) +{ + size_t len = fuse_buf_size(buf); + (void) flags; + + return fuse_send_data_iov_fallback(f, ch, iov, iov_count, buf, len); +} +#endif + +int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +{ + struct iovec iov[2]; + struct fuse_out_header out; + int res; + + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + + out.unique = req->unique; + out.error = 0; + + res = fuse_send_data_iov(req->f, req->ch, iov, 1, bufv, flags); + if (res <= 0) { + fuse_free_req(req); + return res; + } else { + return fuse_reply_err(req, res); + } +} + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) { struct fuse_statfs_out arg; @@ -402,7 +800,7 @@ int fuse_reply_xattr(fuse_req_t req, size_t count) return send_reply_ok(req, &arg, sizeof(arg)); } -int fuse_reply_lock(fuse_req_t req, struct flock *lock) +int fuse_reply_lock(fuse_req_t req, const struct flock *lock) { struct fuse_lk_out arg; @@ -429,13 +827,34 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx) return send_reply_ok(req, &arg, sizeof(arg)); } +static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, + size_t count) +{ + struct fuse_ioctl_iovec *fiov; + size_t i; + + fiov = malloc(sizeof(fiov[0]) * count); + if (!fiov) + return NULL; + + for (i = 0; i < count; i++) { + fiov[i].base = (uintptr_t) iov[i].iov_base; + fiov[i].len = iov[i].iov_len; + } + + return fiov; +} + int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, size_t in_count, const struct iovec *out_iov, size_t out_count) { struct fuse_ioctl_out arg; + struct fuse_ioctl_iovec *in_fiov = NULL; + struct fuse_ioctl_iovec *out_fiov = NULL; struct iovec iov[4]; size_t count = 1; + int res; memset(&arg, 0, sizeof(arg)); arg.flags |= FUSE_IOCTL_RETRY; @@ -445,19 +864,55 @@ int fuse_reply_ioctl_retry(fuse_req_t req, iov[count].iov_len = sizeof(arg); count++; - if (in_count) { - iov[count].iov_base = (void *)in_iov; - iov[count].iov_len = sizeof(in_iov[0]) * in_count; - count++; - } + if (req->f->conn.proto_minor < 16) { + if (in_count) { + iov[count].iov_base = (void *)in_iov; + iov[count].iov_len = sizeof(in_iov[0]) * in_count; + count++; + } - if (out_count) { - iov[count].iov_base = (void *)out_iov; - iov[count].iov_len = sizeof(out_iov[0]) * out_count; - count++; + if (out_count) { + iov[count].iov_base = (void *)out_iov; + iov[count].iov_len = sizeof(out_iov[0]) * out_count; + count++; + } + } else { + /* Can't handle non-compat 64bit ioctls on 32bit */ + if (sizeof(void *) == 4 && req->ioctl_64bit) { + res = fuse_reply_err(req, EINVAL); + goto out; + } + + if (in_count) { + in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); + if (!in_fiov) + goto enomem; + + iov[count].iov_base = (void *)in_fiov; + iov[count].iov_len = sizeof(in_fiov[0]) * in_count; + count++; + } + if (out_count) { + out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); + if (!out_fiov) + goto enomem; + + iov[count].iov_base = (void *)out_fiov; + iov[count].iov_len = sizeof(out_fiov[0]) * out_count; + count++; + } } - return send_reply_iov(req, 0, iov, count); + res = send_reply_iov(req, 0, iov, count); +out: + free(in_fiov); + free(out_fiov); + + return res; + +enomem: + res = fuse_reply_err(req, ENOMEM); + goto out; } int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) @@ -489,10 +944,8 @@ int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, int res; padded_iov = malloc((count + 2) * sizeof(struct iovec)); - if (padded_iov == NULL) { - printf("ENOMEM fuse_reply_err\n"); - return fuse_reply_err(req, -ENOMEM); - } + if (padded_iov == NULL) + return fuse_reply_err(req, ENOMEM); memset(&arg, 0, sizeof(arg)); arg.result = result; @@ -523,10 +976,8 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.lookup) req->f->op.lookup(req, nodeid, name); - else { - printf("ENOSYS do_lookup\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -539,6 +990,40 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fuse_reply_none(req); } +static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + const void *inarg) +{ + struct fuse_batch_forget_in *arg = (void *) inarg; + struct fuse_forget_one *param = (void *) PARAM(arg); + unsigned int i; + + (void) nodeid; + + if (req->f->op.forget_multi) { + req->f->op.forget_multi(req, arg->count, + (struct fuse_forget_data *) param); + } else if (req->f->op.forget) { + for (i = 0; i < arg->count; i++) { + struct fuse_forget_one *forget = ¶m[i]; + struct fuse_req *dummy_req; + + dummy_req = fuse_ll_alloc_req(req->f); + if (dummy_req == NULL) + break; + + dummy_req->unique = req->unique; + dummy_req->ctx = req->ctx; + dummy_req->ch = NULL; + + req->f->op.forget(dummy_req, forget->nodeid, + forget->nlookup); + } + fuse_reply_none(req); + } else { + fuse_reply_none(req); + } +} + static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { struct fuse_file_info *fip = NULL; @@ -557,10 +1042,8 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.getattr) req->f->op.getattr(req, nodeid, fip); - else { - printf("ENOSYS do_getattr\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -591,10 +1074,8 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) FUSE_SET_ATTR_MTIME_NOW; req->f->op.setattr(req, nodeid, &stbuf, arg->valid, fi); - } else { - printf("ENOSYS do_setattr\n"); + } else fuse_reply_err(req, ENOSYS); - } } static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -603,10 +1084,8 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.access) req->f->op.access(req, nodeid, arg->mask); - else { - printf("ENOSYS do_access\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -615,10 +1094,8 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.readlink) req->f->op.readlink(req, nodeid); - else { - printf("ENOSYS do_readlink\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -633,10 +1110,8 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.mknod) req->f->op.mknod(req, nodeid, name, arg->mode, arg->rdev); - else { - printf("ENOSYS do_mknod\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -648,10 +1123,8 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.mkdir) req->f->op.mkdir(req, nodeid, PARAM(arg), arg->mode); - else { - printf("ENOSYS do_mkdir\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -660,10 +1133,8 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.unlink) req->f->op.unlink(req, nodeid, name); - else { - printf("ENOSYS do_unlink\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -672,10 +1143,8 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.rmdir) req->f->op.rmdir(req, nodeid, name); - else { - printf("ENOSYS do_rmdir\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -685,10 +1154,8 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.symlink) req->f->op.symlink(req, linkname, nodeid, name); - else { - printf("ENOSYS do_symlink\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -699,10 +1166,8 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.rename) req->f->op.rename(req, nodeid, oldname, arg->newdir, newname); - else { - printf("ENOSYS do_rename\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -711,10 +1176,8 @@ static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.link) req->f->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); - else { - printf("ENOSYS do_link\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -734,10 +1197,8 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) name = (char *) inarg + sizeof(struct fuse_open_in); req->f->op.create(req, nodeid, name, arg->mode, &fi); - } else { - printf("ENOSYS do_create\n"); + } else fuse_reply_err(req, ENOSYS); - } } static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -769,10 +1230,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fi.flags = arg->flags; } req->f->op.read(req, nodeid, arg->size, arg->offset, &fi); - } else { - printf("ENOSYS do_read\n"); + } else fuse_reply_err(req, ENOSYS); - } } static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -797,33 +1256,74 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.write) req->f->op.write(req, nodeid, param, arg->size, arg->offset, &fi); - else { - printf("ENOSYS do_write\n"); + else fuse_reply_err(req, ENOSYS); - } } -static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + const struct fuse_buf *ibuf) { - struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; + struct fuse_ll *f = req->f; + struct fuse_bufvec bufv = { + .buf[0] = *ibuf, + .count = 1, + }; + struct fuse_write_in *arg = (struct fuse_write_in *) inarg; struct fuse_file_info fi; memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; fi.fh_old = fi.fh; - fi.flush = 1; - if (req->f->conn.proto_minor >= 7) + fi.writepage = arg->write_flags & 1; + + if (req->f->conn.proto_minor < 9) { + bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; + bufv.buf[0].size -= sizeof(struct fuse_in_header) + + FUSE_COMPAT_WRITE_IN_SIZE; + assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); + } else { fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; + if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) + bufv.buf[0].mem = PARAM(arg); - if (req->f->op.flush) - req->f->op.flush(req, nodeid, &fi); - else { - printf("ENOSYS do_flush\n"); - fuse_reply_err(req, ENOSYS); + bufv.buf[0].size -= sizeof(struct fuse_in_header) + + sizeof(struct fuse_write_in); } -} + if (bufv.buf[0].size < arg->size) { + fprintf(stderr, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); + goto out; + } + bufv.buf[0].size = arg->size; -static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + req->f->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); + +out: + /* Need to reset the pipe if ->write_buf() didn't consume all data */ + if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) + fuse_ll_clear_pipe(f); +} + +static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; + struct fuse_file_info fi; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.fh_old = fi.fh; + fi.flush = 1; + if (req->f->conn.proto_minor >= 7) + fi.lock_owner = arg->lock_owner; + + if (req->f->op.flush) + req->f->op.flush(req, nodeid, &fi); + else + fuse_reply_err(req, ENOSYS); +} + +static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { struct fuse_release_in *arg = (struct fuse_release_in *) inarg; struct fuse_file_info fi; @@ -836,6 +1336,10 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; fi.lock_owner = arg->lock_owner; } + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; + fi.lock_owner = arg->lock_owner; + } if (req->f->op.release) req->f->op.release(req, nodeid, &fi); @@ -854,10 +1358,8 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.fsync) req->f->op.fsync(req, nodeid, arg->fsync_flags & 1, &fi); - else { - printf("ENOSYS do_fsync\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -885,10 +1387,8 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.readdir) req->f->op.readdir(req, nodeid, arg->size, arg->offset, &fi); - else { - printf("ENOSYS do_fsync\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -918,10 +1418,8 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.fsyncdir) req->f->op.fsyncdir(req, nodeid, arg->fsync_flags & 1, &fi); - else { - printf("ENOSYS do_fsyncdir\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -949,10 +1447,8 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.setxattr) req->f->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); - else { - printf("ENOSYS do_setxattr\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -961,10 +1457,8 @@ static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.getxattr) req->f->op.getxattr(req, nodeid, PARAM(arg), arg->size); - else { - printf("ENOSYS do_getxattr\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -973,10 +1467,8 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.listxattr) req->f->op.listxattr(req, nodeid, arg->size); - else { - printf("ENOSYS do_listxattr\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -985,10 +1477,8 @@ static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.removexattr) req->f->op.removexattr(req, nodeid, name); - else { - printf("ENOSYS do_removetxattr\n"); + else fuse_reply_err(req, ENOSYS); - } } static void convert_fuse_file_lock(struct fuse_file_lock *fl, @@ -1018,10 +1508,8 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) convert_fuse_file_lock(&arg->lk, &flock); if (req->f->op.getlk) req->f->op.getlk(req, nodeid, &fi, &flock); - else { - printf("do_getlk ENOSYS\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, @@ -1035,12 +1523,33 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, fi.fh = arg->fh; fi.lock_owner = arg->owner; - convert_fuse_file_lock(&arg->lk, &flock); - if (req->f->op.setlk) - req->f->op.setlk(req, nodeid, &fi, &flock, sleep); - else { - printf("do_getlk ENOSYS\n"); - fuse_reply_err(req, ENOSYS); + if (arg->lk_flags & FUSE_LK_FLOCK) { + int op = 0; + + switch (arg->lk.type) { + case F_RDLCK: + op = LOCK_SH; + break; + case F_WRLCK: + op = LOCK_EX; + break; + case F_UNLCK: + op = LOCK_UN; + break; + } + if (!sleep) + op |= LOCK_NB; + + if (req->f->op.flock) + req->f->op.flock(req, nodeid, &fi, op); + else + fuse_reply_err(req, ENOSYS); + } else { + convert_fuse_file_lock(&arg->lk, &flock); + if (req->f->op.setlk) + req->f->op.setlk(req, nodeid, &fi, &flock, sleep); + else + fuse_reply_err(req, ENOSYS); } } @@ -1141,10 +1650,8 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (req->f->op.bmap) req->f->op.bmap(req, nodeid, arg->blocksize, arg->block); - else { - printf("do_bmap ENOSYS\n"); + else fuse_reply_err(req, ENOSYS); - } } static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) @@ -1154,18 +1661,27 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) void *in_buf = arg->in_size ? PARAM(arg) : NULL; struct fuse_file_info fi; + if (flags & FUSE_IOCTL_DIR && + !(req->f->conn.want & FUSE_CAP_IOCTL_DIR)) { + fuse_reply_err(req, ENOTTY); + return; + } + memset(&fi, 0, sizeof(fi)); fi.fh = arg->fh; fi.fh_old = fi.fh; + if (sizeof(void *) == 4 && req->f->conn.proto_minor >= 16 && + !(flags & FUSE_IOCTL_32BIT)) { + req->ioctl_64bit = 1; + } + if (req->f->op.ioctl) req->f->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, &fi, flags, in_buf, arg->in_size, arg->out_size); - else { - printf("do_ioctl ENOSYS\n"); + else fuse_reply_err(req, ENOSYS); - } } void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) @@ -1188,7 +1704,6 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ph = malloc(sizeof(struct fuse_pollhandle)); if (ph == NULL) { - printf("ENOMEM do_poll\n"); fuse_reply_err(req, ENOMEM); return; } @@ -1199,11 +1714,24 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) req->f->op.poll(req, nodeid, &fi, ph); } else { - printf("ENOSYS do_poll\n"); fuse_reply_err(req, ENOSYS); } } +static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +{ + struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; + struct fuse_file_info fi; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + + if (req->f->op.fallocate) + req->f->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); + else + fuse_reply_err(req, ENOSYS); +} + static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) { struct fuse_init_in *arg = (struct fuse_init_in *) inarg; @@ -1232,7 +1760,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (arg->major < 7) { fprintf(stderr, "fuse: unsupported protocol version: %u.%u\n", arg->major, arg->minor); - printf("EPROTO do_init\n"); fuse_reply_err(req, EPROTO); return; } @@ -1260,15 +1787,36 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) f->conn.capable |= FUSE_CAP_BIG_WRITES; if (arg->flags & FUSE_DONT_MASK) f->conn.capable |= FUSE_CAP_DONT_MASK; + if (arg->flags & FUSE_FLOCK_LOCKS) + f->conn.capable |= FUSE_CAP_FLOCK_LOCKS; } else { f->conn.async_read = 0; f->conn.max_readahead = 0; } + if (req->f->conn.proto_minor >= 14) { +#ifdef HAVE_SPLICE +#ifdef HAVE_VMSPLICE + f->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + if (f->splice_write) + f->conn.want |= FUSE_CAP_SPLICE_WRITE; + if (f->splice_move) + f->conn.want |= FUSE_CAP_SPLICE_MOVE; +#endif + f->conn.capable |= FUSE_CAP_SPLICE_READ; + if (f->splice_read) + f->conn.want |= FUSE_CAP_SPLICE_READ; +#endif + } + if (req->f->conn.proto_minor >= 18) + f->conn.capable |= FUSE_CAP_IOCTL_DIR; + if (f->atomic_o_trunc) f->conn.want |= FUSE_CAP_ATOMIC_O_TRUNC; - if (f->op.getlk && f->op.setlk && !f->no_remote_lock) + if (f->op.getlk && f->op.setlk && !f->no_remote_posix_lock) f->conn.want |= FUSE_CAP_POSIX_LOCKS; + if (f->op.flock && !f->no_remote_flock) + f->conn.want |= FUSE_CAP_FLOCK_LOCKS; if (f->big_writes) f->conn.want |= FUSE_CAP_BIG_WRITES; @@ -1286,6 +1834,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) if (f->op.init) f->op.init(f->userdata, &f->conn); + if (f->no_splice_read) + f->conn.want &= ~FUSE_CAP_SPLICE_READ; + if (f->no_splice_write) + f->conn.want &= ~FUSE_CAP_SPLICE_WRITE; + if (f->no_splice_move) + f->conn.want &= ~FUSE_CAP_SPLICE_MOVE; + if (f->conn.async_read || (f->conn.want & FUSE_CAP_ASYNC_READ)) outarg.flags |= FUSE_ASYNC_READ; if (f->conn.want & FUSE_CAP_POSIX_LOCKS) @@ -1298,8 +1853,23 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) outarg.flags |= FUSE_BIG_WRITES; if (f->conn.want & FUSE_CAP_DONT_MASK) outarg.flags |= FUSE_DONT_MASK; + if (f->conn.want & FUSE_CAP_FLOCK_LOCKS) + outarg.flags |= FUSE_FLOCK_LOCKS; outarg.max_readahead = f->conn.max_readahead; outarg.max_write = f->conn.max_write; + if (f->conn.proto_minor >= 13) { + if (f->conn.max_background >= (1 << 16)) + f->conn.max_background = (1 << 16) - 1; + if (f->conn.congestion_threshold > f->conn.max_background) + f->conn.congestion_threshold = f->conn.max_background; + if (!f->conn.congestion_threshold) { + f->conn.congestion_threshold = + f->conn.max_background * 3 / 4; + } + + outarg.max_background = f->conn.max_background; + outarg.congestion_threshold = f->conn.congestion_threshold; + } if (f->debug) { fprintf(stderr, " INIT: %u.%u\n", outarg.major, outarg.minor); @@ -1307,6 +1877,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) fprintf(stderr, " max_readahead=0x%08x\n", outarg.max_readahead); fprintf(stderr, " max_write=0x%08x\n", outarg.max_write); + fprintf(stderr, " max_background=%i\n", + outarg.max_background); + fprintf(stderr, " congestion_threshold=%i\n", + outarg.congestion_threshold); } send_reply_ok(req, &outarg, arg->minor < 5 ? 8 : sizeof(outarg)); @@ -1326,22 +1900,65 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) send_reply_ok(req, NULL, 0); } +static void list_del_nreq(struct fuse_notify_req *nreq) +{ + struct fuse_notify_req *prev = nreq->prev; + struct fuse_notify_req *next = nreq->next; + prev->next = next; + next->prev = prev; +} + +static void list_add_nreq(struct fuse_notify_req *nreq, + struct fuse_notify_req *next) +{ + struct fuse_notify_req *prev = next->prev; + nreq->next = next; + nreq->prev = prev; + prev->next = nreq; + next->prev = nreq; +} + +static void list_init_nreq(struct fuse_notify_req *nreq) +{ + nreq->next = nreq; + nreq->prev = nreq; +} + +static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, + const void *inarg, const struct fuse_buf *buf) +{ + struct fuse_ll *f = req->f; + struct fuse_notify_req *nreq; + struct fuse_notify_req *head; + + pthread_mutex_lock(&f->lock); + head = &f->notify_list; + for (nreq = head->next; nreq != head; nreq = nreq->next) { + if (nreq->unique == req->unique) { + list_del_nreq(nreq); + break; + } + } + pthread_mutex_unlock(&f->lock); + + if (nreq != head) + nreq->reply(nreq, req, nodeid, inarg, buf); +} + static int send_notify_iov(struct fuse_ll *f, struct fuse_chan *ch, int notify_code, struct iovec *iov, int count) { struct fuse_out_header out; + if (!f->got_init) + return -ENOTCONN; + out.unique = 0; out.error = notify_code; iov[0].iov_base = &out; iov[0].iov_len = sizeof(struct fuse_out_header); - out.len = iov_length(iov, count); - if (f->debug) - fprintf(stderr, "NOTIFY: code=%d count=%d length=%u\n", - notify_code, count, out.len); - - return fuse_chan_send(ch, iov, count); + return fuse_send_msg(f, ch, iov, count); } int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) @@ -1411,6 +2028,170 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_chan *ch, fuse_ino_t parent, return send_notify_iov(f, ch, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); } +int fuse_lowlevel_notify_delete(struct fuse_chan *ch, + fuse_ino_t parent, fuse_ino_t child, + const char *name, size_t namelen) +{ + struct fuse_notify_delete_out outarg; + struct fuse_ll *f; + struct iovec iov[3]; + + if (!ch) + return -EINVAL; + + f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch)); + if (!f) + return -ENODEV; + + if (f->conn.proto_minor < 18) + return -ENOSYS; + + outarg.parent = parent; + outarg.child = child; + outarg.namelen = namelen; + outarg.padding = 0; + + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; + iov[2].iov_len = namelen + 1; + + return send_notify_iov(f, ch, FUSE_NOTIFY_DELETE, iov, 3); +} + +int fuse_lowlevel_notify_store(struct fuse_chan *ch, fuse_ino_t ino, + off64_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +{ + struct fuse_out_header out; + struct fuse_notify_store_out outarg; + struct fuse_ll *f; + struct iovec iov[3]; + size_t size = fuse_buf_size(bufv); + int res; + + if (!ch) + return -EINVAL; + + f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch)); + if (!f) + return -ENODEV; + + if (f->conn.proto_minor < 15) + return -ENOSYS; + + out.unique = 0; + out.error = FUSE_NOTIFY_STORE; + + outarg.nodeid = ino; + outarg.offset = offset; + outarg.size = size; + + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(out); + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + + res = fuse_send_data_iov(f, ch, iov, 2, bufv, flags); + if (res > 0) + res = -res; + + return res; +} + +struct fuse_retrieve_req { + struct fuse_notify_req nreq; + void *cookie; +}; + +static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, + fuse_req_t req, fuse_ino_t ino, + const void *inarg, + const struct fuse_buf *ibuf) +{ + struct fuse_ll *f = req->f; + struct fuse_retrieve_req *rreq = + container_of(nreq, struct fuse_retrieve_req, nreq); + const struct fuse_notify_retrieve_in *arg = inarg; + struct fuse_bufvec bufv = { + .buf[0] = *ibuf, + .count = 1, + }; + + if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) + bufv.buf[0].mem = PARAM(arg); + + bufv.buf[0].size -= sizeof(struct fuse_in_header) + + sizeof(struct fuse_notify_retrieve_in); + + if (bufv.buf[0].size < arg->size) { + fprintf(stderr, "fuse: retrieve reply: buffer size too small\n"); + fuse_reply_none(req); + goto out; + } + bufv.buf[0].size = arg->size; + + if (req->f->op.retrieve_reply) { + req->f->op.retrieve_reply(req, rreq->cookie, ino, + arg->offset, &bufv); + } else { + fuse_reply_none(req); + } +out: + free(rreq); + if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) + fuse_ll_clear_pipe(f); +} + +int fuse_lowlevel_notify_retrieve(struct fuse_chan *ch, fuse_ino_t ino, + size_t size, off64_t offset, void *cookie) +{ + struct fuse_notify_retrieve_out outarg; + struct fuse_ll *f; + struct iovec iov[2]; + struct fuse_retrieve_req *rreq; + int err; + + if (!ch) + return -EINVAL; + + f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch)); + if (!f) + return -ENODEV; + + if (f->conn.proto_minor < 15) + return -ENOSYS; + + rreq = malloc(sizeof(*rreq)); + if (rreq == NULL) + return -ENOMEM; + + pthread_mutex_lock(&f->lock); + rreq->cookie = cookie; + rreq->nreq.unique = f->notify_ctr++; + rreq->nreq.reply = fuse_ll_retrieve_reply; + list_add_nreq(&rreq->nreq, &f->notify_list); + pthread_mutex_unlock(&f->lock); + + outarg.notify_unique = rreq->nreq.unique; + outarg.nodeid = ino; + outarg.offset = offset; + outarg.size = size; + + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + + err = send_notify_iov(f, ch, FUSE_NOTIFY_RETRIEVE, iov, 2); + if (err) { + pthread_mutex_lock(&f->lock); + list_del_nreq(&rreq->nreq); + pthread_mutex_unlock(&f->lock); + free(rreq); + } + + return err; +} + void *fuse_req_userdata(fuse_req_t req) { return req->f->userdata; @@ -1431,7 +2212,9 @@ const struct fuse_ctx *fuse_req_ctx_compat24(fuse_req_t req) { return fuse_req_ctx(req); } +#ifndef __NetBSD__ FUSE_SYMVER(".symver fuse_req_ctx_compat24,fuse_req_ctx@FUSE_2.4"); +#endif void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, @@ -1499,7 +2282,10 @@ static struct { [FUSE_BMAP] = { do_bmap, "BMAP" }, [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, [FUSE_POLL] = { do_poll, "POLL" }, + [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, [FUSE_DESTROY] = { do_destroy, "DESTROY" }, + [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, + [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, }; @@ -1513,37 +2299,84 @@ static const char *opname(enum fuse_opcode opcode) return fuse_ll_ops[opcode].name; } -static void fuse_ll_process(void *data, const char *buf, size_t len, - struct fuse_chan *ch) +static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, + struct fuse_bufvec *src) +{ + int res = fuse_buf_copy(dst, src, 0); + if (res < 0) { + fprintf(stderr, "fuse: copy from pipe: %s\n", strerror(-res)); + return res; + } + if (res < fuse_buf_size(dst)) { + fprintf(stderr, "fuse: copy from pipe: short read\n"); + return -1; + } + return 0; +} + +static void fuse_ll_process_buf(void *data, const struct fuse_buf *buf, + struct fuse_chan *ch) { struct fuse_ll *f = (struct fuse_ll *) data; - struct fuse_in_header *in = (struct fuse_in_header *) buf; - const void *inarg = buf + sizeof(struct fuse_in_header); + const size_t write_header_size = sizeof(struct fuse_in_header) + + sizeof(struct fuse_write_in); + struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; + struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); + struct fuse_in_header *in; + const void *inarg; struct fuse_req *req; + void *mbuf = NULL; int err; + int res; - if (f->debug) + if (buf->flags & FUSE_BUF_IS_FD) { + if (buf->size < tmpbuf.buf[0].size) + tmpbuf.buf[0].size = buf->size; + + mbuf = malloc(tmpbuf.buf[0].size); + if (mbuf == NULL) { + fprintf(stderr, "fuse: failed to allocate header\n"); + goto clear_pipe; + } + tmpbuf.buf[0].mem = mbuf; + + res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); + if (res < 0) + goto clear_pipe; + + in = mbuf; + } else { + in = buf->mem; + } + + if (f->debug) { fprintf(stderr, - "unique: %llu, opcode: %s (%i), nodeid: %lu, insize: %zu\n", + "unique: %llu, opcode: %s (%i), nodeid: %lu, insize: %zu, pid: %u\n", (unsigned long long) in->unique, opname((enum fuse_opcode) in->opcode), in->opcode, - (unsigned long) in->nodeid, len); + (unsigned long) in->nodeid, buf->size, in->pid); + } - req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); + req = fuse_ll_alloc_req(f); if (req == NULL) { - fprintf(stderr, "fuse: failed to allocate request\n"); - return; + struct fuse_out_header out = { + .unique = in->unique, + .error = -ENOMEM, + }; + struct iovec iov = { + .iov_base = &out, + .iov_len = sizeof(struct fuse_out_header), + }; + + fuse_send_msg(f, ch, &iov, 1); + goto clear_pipe; } - req->f = f; req->unique = in->unique; req->ctx.uid = in->uid; req->ctx.gid = in->gid; req->ctx.pid = in->pid; req->ch = ch; - req->ctr = 1; - list_init_req(req); - fuse_mutex_init(&req->lock); err = EIO; if (!f->got_init) { @@ -1560,7 +2393,8 @@ static void fuse_ll_process(void *data, const char *buf, size_t len, in->opcode != FUSE_INIT && in->opcode != FUSE_READ && in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && - in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR) + in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && + in->opcode != FUSE_NOTIFY_REPLY) goto reply_err; err = ENOSYS; @@ -1572,16 +2406,61 @@ static void fuse_ll_process(void *data, const char *buf, size_t len, intr = check_interrupt(f, req); list_add_req(req, &f->list); pthread_mutex_unlock(&f->lock); - if (intr) { - printf("EAGAIN fuse_llprocess\n"); + if (intr) fuse_reply_err(intr, EAGAIN); - } } - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + + if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && + (in->opcode != FUSE_WRITE || !f->op.write_buf) && + in->opcode != FUSE_NOTIFY_REPLY) { + void *newmbuf; + + err = ENOMEM; + newmbuf = realloc(mbuf, buf->size); + if (newmbuf == NULL) + goto reply_err; + mbuf = newmbuf; + + tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); + tmpbuf.buf[0].mem = mbuf + write_header_size; + + res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); + err = -res; + if (res < 0) + goto reply_err; + + in = mbuf; + } + + inarg = (void *) &in[1]; + if (in->opcode == FUSE_WRITE && f->op.write_buf) + do_write_buf(req, in->nodeid, inarg, buf); + else if (in->opcode == FUSE_NOTIFY_REPLY) + do_notify_reply(req, in->nodeid, inarg, buf); + else + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + +out_free: + free(mbuf); return; - reply_err: +reply_err: fuse_reply_err(req, err); +clear_pipe: + if (buf->flags & FUSE_BUF_IS_FD) + fuse_ll_clear_pipe(f); + goto out_free; +} + +static void fuse_ll_process(void *data, const char *buf, size_t len, + struct fuse_chan *ch) +{ + struct fuse_buf fbuf = { + .mem = (void *) buf, + .size = len, + }; + + fuse_ll_process_buf(data, &fbuf, ch); } enum { @@ -1589,17 +2468,29 @@ enum { KEY_VERSION, }; -static struct fuse_opt fuse_ll_opts[] = { +static const struct fuse_opt fuse_ll_opts[] = { { "debug", offsetof(struct fuse_ll, debug), 1 }, { "-d", offsetof(struct fuse_ll, debug), 1 }, { "allow_root", offsetof(struct fuse_ll, allow_root), 1 }, { "max_write=%u", offsetof(struct fuse_ll, conn.max_write), 0 }, { "max_readahead=%u", offsetof(struct fuse_ll, conn.max_readahead), 0 }, + { "max_background=%u", offsetof(struct fuse_ll, conn.max_background), 0 }, + { "congestion_threshold=%u", + offsetof(struct fuse_ll, conn.congestion_threshold), 0 }, { "async_read", offsetof(struct fuse_ll, conn.async_read), 1 }, { "sync_read", offsetof(struct fuse_ll, conn.async_read), 0 }, { "atomic_o_trunc", offsetof(struct fuse_ll, atomic_o_trunc), 1}, - { "no_remote_lock", offsetof(struct fuse_ll, no_remote_lock), 1}, + { "no_remote_lock", offsetof(struct fuse_ll, no_remote_posix_lock), 1}, + { "no_remote_lock", offsetof(struct fuse_ll, no_remote_flock), 1}, + { "no_remote_flock", offsetof(struct fuse_ll, no_remote_flock), 1}, + { "no_remote_posix_lock", offsetof(struct fuse_ll, no_remote_posix_lock), 1}, { "big_writes", offsetof(struct fuse_ll, big_writes), 1}, + { "splice_write", offsetof(struct fuse_ll, splice_write), 1}, + { "no_splice_write", offsetof(struct fuse_ll, no_splice_write), 1}, + { "splice_move", offsetof(struct fuse_ll, splice_move), 1}, + { "no_splice_move", offsetof(struct fuse_ll, no_splice_move), 1}, + { "splice_read", offsetof(struct fuse_ll, splice_read), 1}, + { "no_splice_read", offsetof(struct fuse_ll, no_splice_read), 1}, FUSE_OPT_KEY("max_read=", FUSE_OPT_KEY_DISCARD), FUSE_OPT_KEY("-h", KEY_HELP), FUSE_OPT_KEY("--help", KEY_HELP), @@ -1619,11 +2510,19 @@ static void fuse_ll_help(void) fprintf(stderr, " -o max_write=N set maximum size of write requests\n" " -o max_readahead=N set maximum readahead\n" +" -o max_background=N set number of maximum background requests\n" +" -o congestion_threshold=N set kernel's congestion threshold\n" " -o async_read perform reads asynchronously (default)\n" " -o sync_read perform reads synchronously\n" " -o atomic_o_trunc enable atomic open+truncate support\n" " -o big_writes enable larger than 4kB writes\n" -" -o no_remote_lock disable remote file locking\n"); +" -o no_remote_lock disable remote file locking\n" +" -o no_remote_flock disable remote file locking (BSD)\n" +" -o no_remote_posix_lock disable remove file locking (POSIX)\n" +" -o [no_]splice_write use splice to write to the fuse device\n" +" -o [no_]splice_move move data while splicing to the fuse device\n" +" -o [no_]splice_read use splice to read from the fuse device\n" +); } static int fuse_ll_opt_proc(void *data, const char *arg, int key, @@ -1655,17 +2554,142 @@ int fuse_lowlevel_is_lib_option(const char *opt) static void fuse_ll_destroy(void *data) { struct fuse_ll *f = (struct fuse_ll *) data; + struct fuse_ll_pipe *llp; if (f->got_init && !f->got_destroy) { if (f->op.destroy) f->op.destroy(f->userdata); } - + llp = pthread_getspecific(f->pipe_key); + if (llp != NULL) + fuse_ll_pipe_free(llp); + pthread_key_delete(f->pipe_key); pthread_mutex_destroy(&f->lock); free(f->cuse_data); free(f); } +static void fuse_ll_pipe_destructor(void *data) +{ + struct fuse_ll_pipe *llp = data; + fuse_ll_pipe_free(llp); +} + +#ifdef HAVE_SPLICE +static int fuse_ll_receive_buf(struct fuse_session *se, struct fuse_buf *buf, + struct fuse_chan **chp) +{ + struct fuse_chan *ch = *chp; + struct fuse_ll *f = fuse_session_data(se); + size_t bufsize = buf->size; + struct fuse_ll_pipe *llp; + struct fuse_buf tmpbuf; + int err; + int res; + + if (f->conn.proto_minor < 14 || !(f->conn.want & FUSE_CAP_SPLICE_READ)) + goto fallback; + + llp = fuse_ll_get_pipe(f); + if (llp == NULL) + goto fallback; + + if (llp->size < bufsize) { + if (llp->can_grow) { + res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); + if (res == -1) { + llp->can_grow = 0; + goto fallback; + } + llp->size = res; + } + if (llp->size < bufsize) + goto fallback; + } + + res = splice(fuse_chan_fd(ch), NULL, llp->pipe[1], NULL, bufsize, 0); + err = errno; + + if (fuse_session_exited(se)) + return 0; + + if (res == -1) { + if (err == ENODEV) { + fuse_session_exit(se); + return 0; + } + if (err != EINTR && err != EAGAIN) + perror("fuse: splice from device"); + return -err; + } + + if (res < sizeof(struct fuse_in_header)) { + fprintf(stderr, "short splice from fuse device\n"); + return -EIO; + } + + tmpbuf = (struct fuse_buf) { + .size = res, + .flags = FUSE_BUF_IS_FD, + .fd = llp->pipe[0], + }; + + /* + * Don't bother with zero copy for small requests. + * fuse_loop_mt() needs to check for FORGET so this more than + * just an optimization. + */ + if (res < sizeof(struct fuse_in_header) + + sizeof(struct fuse_write_in) + pagesize) { + struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; + struct fuse_bufvec dst = { .buf[0] = *buf, .count = 1 }; + + res = fuse_buf_copy(&dst, &src, 0); + if (res < 0) { + fprintf(stderr, "fuse: copy from pipe: %s\n", + strerror(-res)); + fuse_ll_clear_pipe(f); + return res; + } + if (res < tmpbuf.size) { + fprintf(stderr, "fuse: copy from pipe: short read\n"); + fuse_ll_clear_pipe(f); + return -EIO; + } + buf->size = tmpbuf.size; + return buf->size; + } + + *buf = tmpbuf; + + return res; + +fallback: + res = fuse_chan_recv(chp, buf->mem, bufsize); + if (res <= 0) + return res; + + buf->size = res; + + return res; +} +#else +static int fuse_ll_receive_buf(struct fuse_session *se, struct fuse_buf *buf, + struct fuse_chan **chp) +{ + (void) se; + + int res = fuse_chan_recv(chp, buf->mem, buf->size); + if (res <= 0) + return res; + + buf->size = res; + + return res; +} +#endif + + /* * always call fuse_lowlevel_new_common() internally, to work around a * misfeature in the FreeBSD runtime linker, which links the old @@ -1675,6 +2699,7 @@ struct fuse_session *fuse_lowlevel_new_common(struct fuse_args *args, const struct fuse_lowlevel_ops *op, size_t op_size, void *userdata) { + int err; struct fuse_ll *f; struct fuse_session *se; struct fuse_session_ops sop = { @@ -1699,10 +2724,19 @@ struct fuse_session *fuse_lowlevel_new_common(struct fuse_args *args, f->atomic_o_trunc = 0; list_init_req(&f->list); list_init_req(&f->interrupts); + list_init_nreq(&f->notify_list); + f->notify_ctr = 1; fuse_mutex_init(&f->lock); - if (fuse_opt_parse(args, f, fuse_ll_opts, fuse_ll_opt_proc) == -1) + err = pthread_key_create(&f->pipe_key, fuse_ll_pipe_destructor); + if (err) { + fprintf(stderr, "fuse: failed to create thread specific key: %s\n", + strerror(err)); goto out_free; + } + + if (fuse_opt_parse(args, f, fuse_ll_opts, fuse_ll_opt_proc) == -1) + goto out_key_destroy; if (f->debug) fprintf(stderr, "FUSE library version: %s\n", PACKAGE_VERSION); @@ -1713,11 +2747,17 @@ struct fuse_session *fuse_lowlevel_new_common(struct fuse_args *args, se = fuse_session_new(&sop, f); if (!se) - goto out_free; + goto out_key_destroy; + + se->receive_buf = fuse_ll_receive_buf; + se->process_buf = fuse_ll_process_buf; return se; +out_key_destroy: + pthread_key_delete(f->pipe_key); out_free: + pthread_mutex_destroy(&f->lock); free(f); out: return NULL; @@ -1800,7 +2840,7 @@ int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) } #endif -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) static void fill_open_compat(struct fuse_open_out *arg, const struct fuse_file_info_compat *f) @@ -1902,7 +2942,7 @@ FUSE_SYMVER(".symver fuse_reply_statfs_compat,fuse_reply_statfs@FUSE_2.4"); FUSE_SYMVER(".symver fuse_reply_open_compat,fuse_reply_open@FUSE_2.4"); FUSE_SYMVER(".symver fuse_lowlevel_new_compat,fuse_lowlevel_new@FUSE_2.4"); -#else /* __FreeBSD__ */ +#else /* __FreeBSD__ || __NetBSD__ */ int fuse_sync_compat_args(struct fuse_args *args) { @@ -1910,7 +2950,7 @@ int fuse_sync_compat_args(struct fuse_args *args) return 0; } -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ struct fuse_session *fuse_lowlevel_new_compat25(struct fuse_args *args, const struct fuse_lowlevel_ops_compat25 *op, diff --git a/fuse/fuse_misc.h b/fuse/fuse_misc.h index c2cfee17a..eedf0e0f7 100644 --- a/fuse/fuse_misc.h +++ b/fuse/fuse_misc.h @@ -9,8 +9,12 @@ #include "config.h" #include -/* Versioned symbols confuse the dynamic linker in uClibc */ -#ifndef __UCLIBC__ +/* + Versioned symbols cannot be used in some cases because it + - confuse the dynamic linker in uClibc + - not supported on MacOSX (in MachO binary format) +*/ +#if (!defined(__UCLIBC__) && !defined(__APPLE__)) #define FUSE_SYMVER(x) __asm__(x) #else #define FUSE_SYMVER(x) diff --git a/fuse/fuse_mt.c b/fuse/fuse_mt.c index 7f940006d..f6dbe71b2 100644 --- a/fuse/fuse_mt.c +++ b/fuse/fuse_mt.c @@ -24,8 +24,6 @@ struct procdata { void *data; }; -#ifdef __MULTI_THREAD - static void mt_session_proc(void *data, const char *buf, size_t len, struct fuse_chan *ch) { @@ -112,9 +110,13 @@ int fuse_loop_mt(struct fuse *f) if (f == NULL) return -1; - return fuse_session_loop_mt(fuse_get_session(f)); + int res = fuse_start_cleanup_thread(f); + if (res) + return -1; + + res = fuse_session_loop_mt(fuse_get_session(f)); + fuse_stop_cleanup_thread(f); + return res; } FUSE_SYMVER(".symver fuse_loop_mt_proc,__fuse_loop_mt@"); - -#endif diff --git a/fuse/fuse_opt.c b/fuse/fuse_opt.c index b15e7db11..a2118cedc 100644 --- a/fuse/fuse_opt.c +++ b/fuse/fuse_opt.c @@ -54,10 +54,15 @@ int fuse_opt_add_arg(struct fuse_args *args, const char *arg) assert(!args->argv || args->allocated); + newarg = strdup(arg); + if (!newarg) + return alloc_failed(); + newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); - newarg = newargv ? strdup(arg) : NULL; - if (!newargv || !newarg) + if (!newargv) { + free(newarg); return alloc_failed(); + } args->argv = newargv; args->allocated = 1; @@ -304,9 +309,21 @@ static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) return -1; d = opts; } else { - if (s[0] == '\\' && s[1] != '\0') + if (s[0] == '\\' && s[1] != '\0') { s++; - *d++ = *s; + if (s[0] >= '0' && s[0] <= '3' && + s[1] >= '0' && s[1] <= '7' && + s[2] >= '0' && s[2] <= '7') { + *d++ = (s[0] - '0') * 0100 + + (s[1] - '0') * 0010 + + (s[2] - '0'); + s += 2; + } else { + *d++ = *s; + } + } else { + *d++ = *s; + } } s++; } diff --git a/fuse/fuse_session.c b/fuse/fuse_session.c index 3758627de..c55f25074 100644 --- a/fuse/fuse_session.c +++ b/fuse/fuse_session.c @@ -80,6 +80,34 @@ void fuse_session_process(struct fuse_session *se, const char *buf, size_t len, se->op.process(se->data, buf, len, ch); } +void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch) +{ + if (se->process_buf) { + se->process_buf(se->data, buf, ch); + } else { + assert(!(buf->flags & FUSE_BUF_IS_FD)); + fuse_session_process(se->data, buf->mem, buf->size, ch); + } +} + +int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf, + struct fuse_chan **chp) +{ + int res; + + if (se->receive_buf) { + res = se->receive_buf(se, buf, chp); + } else { + res = fuse_chan_recv(chp, buf->mem, buf->size); + if (res > 0) + buf->size = res; + } + + return res; +} + + void fuse_session_destroy(struct fuse_session *se) { if (se->op.destroy) diff --git a/fuse/helper.c b/fuse/helper.c index 7b994fd3b..ace19dd70 100644 --- a/fuse/helper.c +++ b/fuse/helper.c @@ -179,14 +179,38 @@ err: int fuse_daemonize(int foreground) { - int res; - if (!foreground) { - res = daemon(0, 0); - if (res == -1) { - perror("fuse: failed to daemonize program\n"); + int nullfd; + + /* + * demonize current process by forking it and killing the + * parent. This makes current process as a child of 'init'. + */ + switch(fork()) { + case -1: + perror("fuse_daemonize: fork"); + return -1; + case 0: + break; + default: + _exit(0); + } + + if (setsid() == -1) { + perror("fuse_daemonize: setsid"); return -1; } + + (void) chdir("/"); + + nullfd = open("/dev/null", O_RDWR, 0); + if (nullfd != -1) { + (void) dup2(nullfd, 0); + (void) dup2(nullfd, 1); + (void) dup2(nullfd, 2); + if (nullfd > 2) + close(nullfd); + } } return 0; } @@ -227,7 +251,8 @@ static void fuse_unmount_common(const char *mountpoint, struct fuse_chan *ch) { int fd = ch ? fuse_chan_fd(ch) : -1; fuse_kern_unmount(mountpoint, fd); - fuse_chan_destroy(ch); + if (ch) + fuse_chan_destroy(ch); } void fuse_unmount(const char *mountpoint, struct fuse_chan *ch) @@ -324,11 +349,9 @@ static int fuse_main_common(int argc, char *argv[], if (fuse == NULL) return 1; -#ifdef __MULTI_THREAD if (multithreaded) res = fuse_loop_mt(fuse); else -#endif res = fuse_loop(fuse); fuse_teardown_common(fuse, mountpoint); @@ -359,7 +382,7 @@ int fuse_version(void) #include "fuse_compat.h" -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) struct fuse *fuse_setup_compat22(int argc, char *argv[], const struct fuse_operations_compat22 *op, @@ -417,7 +440,7 @@ FUSE_SYMVER(".symver fuse_teardown,__fuse_teardown@"); FUSE_SYMVER(".symver fuse_main_compat2,fuse_main@"); FUSE_SYMVER(".symver fuse_main_real_compat22,fuse_main_real@FUSE_2.2"); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ struct fuse *fuse_setup_compat25(int argc, char *argv[], diff --git a/fuse/include/fuse.h b/fuse/include/fuse.h index 899830fb8..cad816cc4 100644 --- a/fuse/include/fuse.h +++ b/fuse/include/fuse.h @@ -20,7 +20,7 @@ */ #ifndef FUSE_USE_VERSION -#define FUSE_USE_VERSION 28 +#define FUSE_USE_VERSION 21 #endif #include "fuse_common.h" @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -412,7 +413,7 @@ struct fuse_operations { * information without calling this method. This ensures, that * for local locks the l_pid field is correctly filled in. The * results may not be accurate in case of race conditions and in - * the presence of hard links, but it's unlikly that an + * the presence of hard links, but it's unlikely that an * application would rely on accurate GETLK results in these * cases. If a conflicting lock is not found, this method will be * called, and the filesystem may fill out l_pid by a meaningful @@ -434,6 +435,11 @@ struct fuse_operations { * Change the access and modification times of a file with * nanosecond resolution * + * This supersedes the old utime() interface. New applications + * should use this. + * + * See the utimensat(2) man page for details. + * * Introduced in version 2.6 */ int (*utimens) (const char *, const struct timespec tv[2]); @@ -449,18 +455,41 @@ struct fuse_operations { int (*bmap) (const char *, size_t blocksize, uint64_t *idx); /** - * Flag indicating, that the filesystem can accept a NULL path + * Flag indicating that the filesystem can accept a NULL path * as the first argument for the following operations: * * read, write, flush, release, fsync, readdir, releasedir, - * fsyncdir, ftruncate, fgetattr and lock + * fsyncdir, ftruncate, fgetattr, lock, ioctl and poll + * + * If this flag is set these operations continue to work on + * unlinked files even if "-ohard_remove" option was specified. + */ + unsigned int flag_nullpath_ok:1; + + /** + * Flag indicating that the path need not be calculated for + * the following operations: + * + * read, write, flush, release, fsync, readdir, releasedir, + * fsyncdir, ftruncate, fgetattr, lock, ioctl and poll + * + * Closely related to flag_nullpath_ok, but if this flag is + * set then the path will not be calculaged even if the file + * wasn't unlinked. However the path can still be non-NULL if + * it needs to be calculated for some other reason. */ - unsigned int flag_nullpath_ok : 1; + unsigned int flag_nopath:1; + + /** + * Flag indicating that the filesystem accepts special + * UTIME_NOW and UTIME_OMIT values in its utimens operation. + */ + unsigned int flag_utime_omit_ok:1; /** * Reserved flags, don't set */ - unsigned int flag_reserved : 31; + unsigned int flag_reserved:29; /** * Ioctl @@ -496,6 +525,70 @@ struct fuse_operations { */ int (*poll) (const char *, struct fuse_file_info *, struct fuse_pollhandle *ph, unsigned *reventsp); + + /** Write contents of buffer to an open file + * + * Similar to the write() method, but data is supplied in a + * generic buffer. Use fuse_buf_copy() to transfer data to + * the destination. + * + * Introduced in version 2.9 + */ + int (*write_buf) (const char *, struct fuse_bufvec *buf, off64_t off, + struct fuse_file_info *); + + /** Store data from an open file in a buffer + * + * Similar to the read() method, but data is stored and + * returned in a generic buffer. + * + * No actual copying of data has to take place, the source + * file descriptor may simply be stored in the buffer for + * later data transfer. + * + * The buffer must be allocated dynamically and stored at the + * location pointed to by bufp. If the buffer contains memory + * regions, they too must be allocated using malloc(). The + * allocated memory will be freed by the caller. + * + * Introduced in version 2.9 + */ + int (*read_buf) (const char *, struct fuse_bufvec **bufp, + size_t size, off64_t off, struct fuse_file_info *); + /** + * Perform BSD file locking operation + * + * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN + * + * Nonblocking requests will be indicated by ORing LOCK_NB to + * the above operations + * + * For more information see the flock(2) manual page. + * + * Additionally fi->owner will be set to a value unique to + * this open file. This same value will be supplied to + * ->release() when the file is released. + * + * Note: if this method is not implemented, the kernel will still + * allow file locking to work locally. Hence it is only + * interesting for network filesystems and similar. + * + * Introduced in version 2.9 + */ + int (*flock) (const char *, struct fuse_file_info *, int op); + + /** + * Allocates space for an open file + * + * This function ensures that required space is allocated for specified + * file. If this function returns success then any subsequent write + * request to specified range is guaranteed not to fail because of lack + * of space on the file system media. + * + * Introduced in version 2.9.1 + */ + int (*fallocate) (const char *, int, off64_t, off64_t, + struct fuse_file_info *); }; /** Extra context that may be needed by some filesystems @@ -671,6 +764,34 @@ int fuse_is_lib_option(const char *opt); int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, size_t op_size, void *user_data); +/** + * Start the cleanup thread when using option "remember". + * + * This is done automatically by fuse_loop_mt() + * @param fuse struct fuse pointer for fuse instance + * @return 0 on success and -1 on error + */ +int fuse_start_cleanup_thread(struct fuse *fuse); + +/** + * Stop the cleanup thread when using option "remember". + * + * This is done automatically by fuse_loop_mt() + * @param fuse struct fuse pointer for fuse instance + */ +void fuse_stop_cleanup_thread(struct fuse *fuse); + +/** + * Iterate over cache removing stale entries + * use in conjunction with "-oremember" + * + * NOTE: This is already done for the standard sessions + * + * @param fuse struct fuse pointer for fuse instance + * @return the number of seconds until the next cleanup + */ +int fuse_clean_cache(struct fuse *fuse); + /* * Stacking API */ @@ -707,8 +828,14 @@ int fuse_fs_open(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi); int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, off64_t off, struct fuse_file_info *fi); +int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec **bufp, size_t size, off64_t off, + struct fuse_file_info *fi); int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, size_t size, off64_t off, struct fuse_file_info *fi); +int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, + struct fuse_bufvec *buf, off64_t off, + struct fuse_file_info *fi); int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, struct fuse_file_info *fi); int fuse_fs_flush(struct fuse_fs *fs, const char *path, @@ -727,6 +854,8 @@ int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, struct fuse_file_info *fi); int fuse_fs_lock(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi, int cmd, struct flock *lock); +int fuse_fs_flock(struct fuse_fs *fs, const char *path, + struct fuse_file_info *fi, int op); int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode); int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid); int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off64_t size); @@ -755,6 +884,8 @@ int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, int cmd, void *arg, int fuse_fs_poll(struct fuse_fs *fs, const char *path, struct fuse_file_info *fi, struct fuse_pollhandle *ph, unsigned *reventsp); +int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, + off64_t offset, off64_t length, struct fuse_file_info *fi); void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn); void fuse_fs_destroy(struct fuse_fs *fs); diff --git a/fuse/include/fuse_common.h b/fuse/include/fuse_common.h index 48c07468d..dab3a569a 100644 --- a/fuse/include/fuse_common.h +++ b/fuse/include/fuse_common.h @@ -17,16 +17,16 @@ #include "fuse_opt.h" #include +#include /** Major version of FUSE library interface */ #define FUSE_MAJOR_VERSION 2 /** Minor version of FUSE library interface */ -#define FUSE_MINOR_VERSION 8 +#define FUSE_MINOR_VERSION 9 #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -#define FUSE_VERSION 26 -//#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) +#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) /* This interface uses 64 bit off64_t */ #if _FILE_OFFSET_BITS != 64 @@ -70,8 +70,14 @@ struct fuse_file_info { seekable. Introduced in version 2.8 */ unsigned int nonseekable : 1; + /* Indicates that flock locks for this file should be + released. If set, lock_owner shall contain a valid value. + May only be set in ->release(). Introduced in version + 2.9 */ + unsigned int flock_release : 1; + /** Padding. Do not use*/ - unsigned int padding : 28; + unsigned int padding : 27; /** File handle. May be filled in by filesystem in open(). Available in all other file operations */ @@ -90,6 +96,10 @@ struct fuse_file_info { * FUSE_CAP_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_CAP_BIG_WRITES: filesystem can handle write size larger than 4kB * FUSE_CAP_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_CAP_SPLICE_WRITE: ability to use splice() to write to the fuse device + * FUSE_CAP_SPLICE_MOVE: ability to move data to the fuse device with splice() + * FUSE_CAP_SPLICE_READ: ability to use splice() to read from the fuse device + * FUSE_CAP_IOCTL_DIR: ioctl support on directories */ #define FUSE_CAP_ASYNC_READ (1 << 0) #define FUSE_CAP_POSIX_LOCKS (1 << 1) @@ -97,6 +107,11 @@ struct fuse_file_info { #define FUSE_CAP_EXPORT_SUPPORT (1 << 4) #define FUSE_CAP_BIG_WRITES (1 << 5) #define FUSE_CAP_DONT_MASK (1 << 6) +#define FUSE_CAP_SPLICE_WRITE (1 << 7) +#define FUSE_CAP_SPLICE_MOVE (1 << 8) +#define FUSE_CAP_SPLICE_READ (1 << 9) +#define FUSE_CAP_FLOCK_LOCKS (1 << 10) +#define FUSE_CAP_IOCTL_DIR (1 << 11) /** * Ioctl flags @@ -104,12 +119,14 @@ struct fuse_file_info { * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_DIR: is a directory * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ #define FUSE_IOCTL_COMPAT (1 << 0) #define FUSE_IOCTL_UNRESTRICTED (1 << 1) #define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_DIR (1 << 4) #define FUSE_IOCTL_MAX_IOV 256 @@ -156,10 +173,20 @@ struct fuse_conn_info { */ unsigned want; + /** + * Maximum number of backgrounded requests + */ + unsigned max_background; + + /** + * Kernel congestion threshold parameter + */ + unsigned congestion_threshold; + /** * For future use. */ - unsigned reserved[25]; + unsigned reserved[23]; }; struct fuse_session; @@ -232,6 +259,186 @@ int fuse_version(void); */ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); +/* ----------------------------------------------------------- * + * Data buffer * + * ----------------------------------------------------------- */ + +/** + * Buffer flags + */ +enum fuse_buf_flags { + /** + * Buffer contains a file descriptor + * + * If this flag is set, the .fd field is valid, otherwise the + * .mem fields is valid. + */ + FUSE_BUF_IS_FD = (1 << 1), + + /** + * Seek on the file descriptor + * + * If this flag is set then the .pos field is valid and is + * used to seek to the given offset before performing + * operation on file descriptor. + */ + FUSE_BUF_FD_SEEK = (1 << 2), + + /** + * Retry operation on file descriptor + * + * If this flag is set then retry operation on file descriptor + * until .size bytes have been copied or an error or EOF is + * detected. + */ + FUSE_BUF_FD_RETRY = (1 << 3), +}; + +/** + * Buffer copy flags + */ +enum fuse_buf_copy_flags { + /** + * Don't use splice(2) + * + * Always fall back to using read and write instead of + * splice(2) to copy data from one file descriptor to another. + * + * If this flag is not set, then only fall back if splice is + * unavailable. + */ + FUSE_BUF_NO_SPLICE = (1 << 1), + + /** + * Force splice + * + * Always use splice(2) to copy data from one file descriptor + * to another. If splice is not available, return -EINVAL. + */ + FUSE_BUF_FORCE_SPLICE = (1 << 2), + + /** + * Try to move data with splice. + * + * If splice is used, try to move pages from the source to the + * destination instead of copying. See documentation of + * SPLICE_F_MOVE in splice(2) man page. + */ + FUSE_BUF_SPLICE_MOVE = (1 << 3), + + /** + * Don't block on the pipe when copying data with splice + * + * Makes the operations on the pipe non-blocking (if the pipe + * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) + * man page. + */ + FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), +}; + +/** + * Single data buffer + * + * Generic data buffer for I/O, extended attributes, etc... Data may + * be supplied as a memory pointer or as a file descriptor + */ +struct fuse_buf { + /** + * Size of data in bytes + */ + size_t size; + + /** + * Buffer flags + */ + enum fuse_buf_flags flags; + + /** + * Memory pointer + * + * Used unless FUSE_BUF_IS_FD flag is set. + */ + void *mem; + + /** + * File descriptor + * + * Used if FUSE_BUF_IS_FD flag is set. + */ + int fd; + + /** + * File position + * + * Used if FUSE_BUF_FD_SEEK flag is set. + */ + off64_t pos; +}; + +/** + * Data buffer vector + * + * An array of data buffers, each containing a memory pointer or a + * file descriptor. + * + * Allocate dynamically to add more than one buffer. + */ +struct fuse_bufvec { + /** + * Number of buffers in the array + */ + size_t count; + + /** + * Index of current buffer within the array + */ + size_t idx; + + /** + * Current offset within the current buffer + */ + size_t off; + + /** + * Array of buffers + */ + struct fuse_buf buf[1]; +}; + +/* Initialize bufvec with a single buffer of given size */ +#define FUSE_BUFVEC_INIT(size__) \ + ((struct fuse_bufvec) { \ + /* .count= */ 1, \ + /* .idx = */ 0, \ + /* .off = */ 0, \ + /* .buf = */ { /* [0] = */ { \ + /* .size = */ (size__), \ + /* .flags = */ (enum fuse_buf_flags) 0, \ + /* .mem = */ NULL, \ + /* .fd = */ -1, \ + /* .pos = */ 0, \ + } } \ + } ) + +/** + * Get total size of data in a fuse buffer vector + * + * @param bufv buffer vector + * @return size of data + */ +size_t fuse_buf_size(const struct fuse_bufvec *bufv); + +/** + * Copy data from one buffer vector to another + * + * @param dst destination buffer vector + * @param src source buffer vector + * @param flags flags controlling the copy + * @return actual number of bytes copied or -errno on error + */ +ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, + enum fuse_buf_copy_flags flags); + /* ----------------------------------------------------------- * * Signal handling * * ----------------------------------------------------------- */ diff --git a/fuse/include/fuse_compat.h b/fuse/include/fuse_compat.h index 95b7c789f..d09323844 100644 --- a/fuse/include/fuse_compat.h +++ b/fuse/include/fuse_compat.h @@ -65,7 +65,7 @@ struct fuse *fuse_setup_compat25(int argc, char *argv[], void fuse_teardown_compat22(struct fuse *fuse, int fd, char *mountpoint); -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) #include struct fuse_operations_compat22 { @@ -198,4 +198,4 @@ struct fuse *fuse_new_compat1(int fd, int flags, void fuse_main_compat1(int argc, char *argv[], const struct fuse_operations_compat1 *op); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ diff --git a/fuse/include/fuse_kernel.h b/fuse/include/fuse_kernel.h index bd736307d..c632b58fb 100644 --- a/fuse/include/fuse_kernel.h +++ b/fuse/include/fuse_kernel.h @@ -56,6 +56,33 @@ * - add umask flag to input argument of open, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables + * + * 7.14 + * - add splice support to fuse device + * + * 7.15 + * - add store notify + * - add retrieve notify + * + * 7.16 + * - add BATCH_FORGET request + * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct + * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' + * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK + * + * 7.18 + * - add FUSE_IOCTL_DIR flag + * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -66,6 +93,7 @@ #define __s64 int64_t #define __u32 uint32_t #define __s32 int32_t +#define __u16 uint16_t /* * Version negotiation: @@ -91,7 +119,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 12 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -166,8 +194,10 @@ struct fuse_file_lock { /** * INIT request/reply flags * + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -176,6 +206,7 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) +#define FUSE_FLOCK_LOCKS (1 << 10) /** * CUSE INIT request/reply flags @@ -188,6 +219,7 @@ struct fuse_file_lock { * Release flags */ #define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) /** * Getattr flags @@ -219,12 +251,16 @@ struct fuse_file_lock { * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed * FUSE_IOCTL_RETRY: retry with new iovecs + * FUSE_IOCTL_32BIT: 32bit ioctl + * FUSE_IOCTL_DIR: is a directory * * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs */ #define FUSE_IOCTL_COMPAT (1 << 0) #define FUSE_IOCTL_UNRESTRICTED (1 << 1) #define FUSE_IOCTL_RETRY (1 << 2) +#define FUSE_IOCTL_32BIT (1 << 3) +#define FUSE_IOCTL_DIR (1 << 4) #define FUSE_IOCTL_MAX_IOV 256 @@ -274,6 +310,9 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_IOCTL = 39, FUSE_POLL = 40, + FUSE_NOTIFY_REPLY = 41, + FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -283,6 +322,9 @@ enum fuse_notify_code { FUSE_NOTIFY_POLL = 1, FUSE_NOTIFY_INVAL_INODE = 2, FUSE_NOTIFY_INVAL_ENTRY = 3, + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_CODE_MAX, }; @@ -306,6 +348,16 @@ struct fuse_forget_in { __u64 nlookup; }; +struct fuse_forget_one { + __u64 nodeid; + __u64 nlookup; +}; + +struct fuse_batch_forget_in { + __u32 count; + __u32 dummy; +}; + struct fuse_getattr_in { __u32 getattr_flags; __u32 dummy; @@ -477,7 +529,8 @@ struct fuse_init_out { __u32 minor; __u32 max_readahead; __u32 flags; - __u32 unused; + __u16 max_background; + __u16 congestion_threshold; __u32 max_write; }; @@ -525,6 +578,11 @@ struct fuse_ioctl_in { __u32 out_size; }; +struct fuse_ioctl_iovec { + __u64 base; + __u64 len; +}; + struct fuse_ioctl_out { __s32 result; __u32 flags; @@ -548,6 +606,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; @@ -570,7 +636,7 @@ struct fuse_dirent { __u64 off; __u32 namelen; __u32 type; - char name[0]; + char name[]; }; #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) @@ -590,4 +656,36 @@ struct fuse_notify_inval_entry_out { __u32 padding; }; +struct fuse_notify_delete_out { + __u64 parent; + __u64 child; + __u32 namelen; + __u32 padding; +}; + +struct fuse_notify_store_out { + __u64 nodeid; + __u64 offset; + __u32 size; + __u32 padding; +}; + +struct fuse_notify_retrieve_out { + __u64 notify_unique; + __u64 nodeid; + __u64 offset; + __u32 size; + __u32 padding; +}; + +/* Matches the size of fuse_write_in */ +struct fuse_notify_retrieve_in { + __u64 dummy1; + __u64 offset; + __u32 size; + __u32 dummy2; + __u64 dummy3; + __u64 dummy4; +}; + #endif /* _LINUX_FUSE_H */ diff --git a/fuse/include/fuse_lowlevel.h b/fuse/include/fuse_lowlevel.h index f1cfeb954..36cf26ddf 100644 --- a/fuse/include/fuse_lowlevel.h +++ b/fuse/include/fuse_lowlevel.h @@ -77,9 +77,16 @@ struct fuse_entry_param { /** Generation number for this entry. * - * The ino/generation pair should be unique for the filesystem's - * lifetime. It must be non-zero, otherwise FUSE will treat it as an - * error. + * If the file system will be exported over NFS, the + * ino/generation pairs need to be unique over the file + * system's lifetime (rather than just the mount time). So if + * the file system reuses an inode after it has been deleted, + * it must assign a new, previously unused generation number + * to the inode at the same time. + * + * The generation must be non-zero, otherwise FUSE will treat + * it as an error. + * */ unsigned long generation; @@ -114,6 +121,11 @@ struct fuse_ctx { mode_t umask; }; +struct fuse_forget_data { + uint64_t ino; + uint64_t nlookup; +}; + /* 'to_set' flags in setattr */ #define FUSE_SET_ATTR_MODE (1 << 0) #define FUSE_SET_ATTR_UID (1 << 1) @@ -188,18 +200,31 @@ struct fuse_lowlevel_ops { /** * Forget about an inode * - * The nlookup parameter indicates the number of lookups - * previously performed on this inode. + * This function is called when the kernel removes an inode + * from its internal caches. * - * If the filesystem implements inode lifetimes, it is recommended - * that inodes acquire a single reference on each lookup, and lose - * nlookup references on each forget. + * The inode's lookup count increases by one for every call to + * fuse_reply_entry and fuse_reply_create. The nlookup parameter + * indicates by how much the lookup count should be decreased. * - * The filesystem may ignore forget calls, if the inodes don't - * need to have a limited lifetime. + * Inodes with a non-zero lookup count may receive request from + * the kernel even after calls to unlink, rmdir or (when + * overwriting an existing file) rename. Filesystems must handle + * such requests properly and it is recommended to defer removal + * of the inode until the lookup count reaches zero. Calls to + * unlink, remdir or rename will be followed closely by forget + * unless the file or directory is open, in which case the + * kernel issues forget only after the release or releasedir + * calls. * - * On unmount it is not guaranteed, that all referenced inodes - * will receive a forget message. + * Note that if a file system will be exported over NFS the + * inodes lifetime must extend even beyond forget. See the + * generation field in struct fuse_entry_param above. + * + * On unmount the lookup count for all inodes implicitly drops + * to zero. It is not guaranteed that the file system will + * receive corresponding forget messages for the affected + * inodes. * * Valid replies: * fuse_reply_none @@ -303,6 +328,11 @@ struct fuse_lowlevel_ops { /** * Remove a file * + * If the file's inode's lookup count is non-zero, the file + * system is expected to postpone any removal of the inode + * until the lookup count reaches zero (see description of the + * forget function). + * * Valid replies: * fuse_reply_err * @@ -315,6 +345,11 @@ struct fuse_lowlevel_ops { /** * Remove a directory * + * If the directory's inode's lookup count is non-zero, the + * file system is expected to postpone any removal of the + * inode until the lookup count reaches zero (see description + * of the forget function). + * * Valid replies: * fuse_reply_err * @@ -340,6 +375,12 @@ struct fuse_lowlevel_ops { const char *name); /** Rename a file + * + * If the target exists it should be atomically replaced. If + * the target's inode's lookup count is non-zero, the file + * system is expected to postpone any removal of the inode + * until the lookup count reaches zero (see description of the + * forget function). * * Valid replies: * fuse_reply_err @@ -412,6 +453,7 @@ struct fuse_lowlevel_ops { * Valid replies: * fuse_reply_buf * fuse_reply_iov + * fuse_reply_data * fuse_reply_err * * @param req request handle @@ -561,6 +603,7 @@ struct fuse_lowlevel_ops { * * Valid replies: * fuse_reply_buf + * fuse_reply_data * fuse_reply_err * * @param req request handle @@ -646,6 +689,7 @@ struct fuse_lowlevel_ops { * * Valid replies: * fuse_reply_buf + * fuse_reply_data * fuse_reply_xattr * fuse_reply_err * @@ -672,6 +716,7 @@ struct fuse_lowlevel_ops { * * Valid replies: * fuse_reply_buf + * fuse_reply_data * fuse_reply_xattr * fuse_reply_err * @@ -787,7 +832,7 @@ struct fuse_lowlevel_ops { * @param req request handle * @param ino the inode number * @param fi file information - * @param lock the region/type to test + * @param lock the region/type to set * @param sleep locking operation may sleep */ void (*setlk) (fuse_req_t req, fuse_ino_t ino, @@ -873,6 +918,104 @@ struct fuse_lowlevel_ops { */ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct fuse_pollhandle *ph); + + /** + * Write data made available in a buffer + * + * This is a more generic version of the ->write() method. If + * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the + * kernel supports splicing from the fuse device, then the + * data will be made available in pipe for supporting zero + * copy data transfer. + * + * Introduced in version 2.9 + * + * Valid replies: + * fuse_reply_write + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param bufv buffer containing the data + * @param off offset to write to + * @param fi file information + */ + void (*write_buf) (fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec *bufv, off64_t off, + struct fuse_file_info *fi); + + /** + * Callback function for the retrieve request + * + * Introduced in version 2.9 + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() + * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() + * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() + * @param bufv the buffer containing the returned data + */ + void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, + off64_t offset, struct fuse_bufvec *bufv); + + /** + * Forget about multiple inodes + * + * See description of the forget function for more + * information. + * + * Introduced in version 2.9 + * + * Valid replies: + * fuse_reply_none + * + * @param req request handle + */ + void (*forget_multi) (fuse_req_t req, size_t count, + struct fuse_forget_data *forgets); + + /** + * Acquire, modify or release a BSD file lock + * + * Note: if the locking methods are not implemented, the kernel + * will still allow file locking to work locally. Hence these are + * only interesting for network filesystems and similar. + * + * Introduced in version 2.9 + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param fi file information + * @param op the locking operation, see flock(2) + */ + void (*flock) (fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int op); + + /** + * Allocate requested space. If this function returns success then + * subsequent writes to the specified range shall not fail due to the lack + * of free space on the file system storage media. + * + * Introduced in version 2.9 + * + * Valid replies: + * fuse_reply_err + * + * @param req request handle + * @param ino the inode number + * @param offset starting point for allocated region + * @param length size of allocated region + * @param mode determines the operation to be performed on the given range, + * see fallocate(2) + */ + void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, + off64_t offset, off64_t length, struct fuse_file_info *fi); }; /** @@ -906,6 +1049,9 @@ void fuse_reply_none(fuse_req_t req); * Possible requests: * lookup, mknod, mkdir, symlink, link * + * Side effects: + * increments the lookup count on success + * * @param req request handle * @param e the entry parameters * @return zero for success, -errno for failure to send reply @@ -921,6 +1067,9 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); * Possible requests: * create * + * Side effects: + * increments the lookup count on success + * * @param req request handle * @param e the entry parameters * @param fi file information @@ -995,6 +1144,20 @@ int fuse_reply_write(fuse_req_t req, size_t count); */ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); +/** + * Reply with data copied/moved from buffer(s) + * + * Possible requests: + * read, readdir, getxattr, listxattr + * + * @param req request handle + * @param bufv buffer vector + * @param flags flags controlling the copy + * @return zero for success, -errno for failure to send reply + */ +int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); + /** * Reply with data vector * @@ -1042,7 +1205,7 @@ int fuse_reply_xattr(fuse_req_t req, size_t count); * @param lock the lock information * @return zero for success, -errno for failure to send reply */ -int fuse_reply_lock(fuse_req_t req, struct flock *lock); +int fuse_reply_lock(fuse_req_t req, const struct flock *lock); /** * Reply with block index @@ -1181,6 +1344,75 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_chan *ch, fuse_ino_t ino, int fuse_lowlevel_notify_inval_entry(struct fuse_chan *ch, fuse_ino_t parent, const char *name, size_t namelen); +/** + * Notify to invalidate parent attributes and delete the dentry matching + * parent/name if the dentry's inode number matches child (otherwise it + * will invalidate the matching dentry). + * + * @param ch the channel through which to send the notification + * @param parent inode number + * @param child inode number + * @param name file name + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_delete(struct fuse_chan *ch, + fuse_ino_t parent, fuse_ino_t child, + const char *name, size_t namelen); + +/** + * Store data to the kernel buffers + * + * Synchronously store data in the kernel buffers belonging to the + * given inode. The stored data is marked up-to-date (no read will be + * performed against it, unless it's invalidated or evicted from the + * cache). + * + * If the stored data overflows the current file size, then the size + * is extended, similarly to a write(2) on the filesystem. + * + * If this function returns an error, then the store wasn't fully + * completed, but it may have been partially completed. + * + * @param ch the channel through which to send the invalidation + * @param ino the inode number + * @param offset the starting offset into the file to store to + * @param bufv buffer vector + * @param flags flags controlling the copy + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_store(struct fuse_chan *ch, fuse_ino_t ino, + off64_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +/** + * Retrieve data from the kernel buffers + * + * Retrieve data in the kernel buffers belonging to the given inode. + * If successful then the retrieve_reply() method will be called with + * the returned data. + * + * Only present pages are returned in the retrieve reply. Retrieving + * stops when it finds a non-present page and only data prior to that is + * returned. + * + * If this function returns an error, then the retrieve will not be + * completed and no reply will be sent. + * + * This function doesn't change the dirty state of pages in the kernel + * buffer. For dirty pages the write() method will be called + * regardless of having been retrieved previously. + * + * @param ch the channel through which to send the invalidation + * @param ino the inode number + * @param size the number of bytes to retrieve + * @param offset the starting offset into the file to retrieve from + * @param cookie user data to supply to the reply callback + * @return zero for success, -errno for failure + */ +int fuse_lowlevel_notify_retrieve(struct fuse_chan *ch, fuse_ino_t ino, + size_t size, off64_t offset, void *cookie); + + /* ----------------------------------------------------------- * * Utility functions * * ----------------------------------------------------------- */ @@ -1376,6 +1608,34 @@ struct fuse_chan *fuse_session_next_chan(struct fuse_session *se, void fuse_session_process(struct fuse_session *se, const char *buf, size_t len, struct fuse_chan *ch); +/** + * Process a raw request supplied in a generic buffer + * + * This is a more generic version of fuse_session_process(). The + * fuse_buf may contain a memory buffer or a pipe file descriptor. + * + * @param se the session + * @param buf the fuse_buf containing the request + * @param ch channel on which the request was received + */ +void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch); + +/** + * Receive a raw request supplied in a generic buffer + * + * This is a more generic version of fuse_chan_recv(). The fuse_buf + * supplied to this function contains a suitably allocated memory + * buffer. This may be overwritten with a file descriptor buffer. + * + * @param se the session + * @param buf the fuse_buf to store the request in + * @param chp pointer to the channel + * @return the actual size of the raw request, or -errno on error + */ +int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf, + struct fuse_chan **chp); + /** * Destroy a session * diff --git a/fuse/include/fuse_lowlevel_compat.h b/fuse/include/fuse_lowlevel_compat.h index 3d2902d7e..78b7c2bd0 100644 --- a/fuse/include/fuse_lowlevel_compat.h +++ b/fuse/include/fuse_lowlevel_compat.h @@ -72,7 +72,7 @@ size_t fuse_dirent_size(size_t namelen); char *fuse_add_dirent(char *buf, const char *name, const struct stat *stbuf, off64_t off); -#ifndef __FreeBSD__ +#if !defined(__FreeBSD__) && !defined(__NetBSD__) #include @@ -139,7 +139,7 @@ struct fuse_session *fuse_lowlevel_new_compat(const char *opts, const struct fuse_lowlevel_ops_compat *op, size_t op_size, void *userdata); -#endif /* __FreeBSD__ */ +#endif /* __FreeBSD__ || __NetBSD__ */ struct fuse_chan_ops_compat24 { int (*receive)(struct fuse_chan *ch, char *buf, size_t size); diff --git a/fuse/include/fuse_opt.h b/fuse/include/fuse_opt.h index 8c08d779a..add0a3089 100644 --- a/fuse/include/fuse_opt.h +++ b/fuse/include/fuse_opt.h @@ -21,7 +21,7 @@ extern "C" { /** * Option description * - * This structure describes a single option, and and action associated + * This structure describes a single option, and action associated * with it, in case it matches. * * More than one such match may occur, in which case the action for @@ -130,7 +130,7 @@ struct fuse_args { /** * Key value passed to the processing function for all non-options * - * Non-options are the arguments beginning with a charater other than + * Non-options are the arguments beginning with a character other than * '-' or all arguments after the special '--' option */ #define FUSE_OPT_KEY_NONOPT -2 @@ -161,7 +161,7 @@ struct fuse_args { * * The 'arg' parameter will always contain the whole argument or * option including the parameter if exists. A two-argument option - * ("-x foo") is always converted to single arguemnt option of the + * ("-x foo") is always converted to single argument option of the * form "-xfoo" before this function is called. * * Options of the form '-ofoo' are passed to this function without the @@ -234,7 +234,7 @@ int fuse_opt_add_arg(struct fuse_args *args, const char *arg); * argument vector * * Adds the argument to the N-th position. This is useful for adding - * options at the beggining of the array which must not come after the + * options at the beginning of the array which must not come after the * special '--' option. * * @param args is the structure containing the current argument list diff --git a/fuse/mount.c b/fuse/mount.c index 88ab59721..af7218fa4 100644 --- a/fuse/mount.c +++ b/fuse/mount.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,20 @@ #include #include +#ifdef __NetBSD__ +#include + +#define MS_RDONLY MNT_RDONLY +#define MS_NOSUID MNT_NOSUID +#define MS_NODEV MNT_NODEV +#define MS_NOEXEC MNT_NOEXEC +#define MS_SYNCHRONOUS MNT_SYNCHRONOUS +#define MS_NOATIME MNT_NOATIME + + +#define umount2(mnt, flags) unmount(mnt, (flags == 2) ? MNT_FORCE : 0) +#endif + #define FUSERMOUNT_PROG "fusermount" #define FUSE_COMMFD_ENV "_FUSE_COMMFD" @@ -54,6 +69,7 @@ struct mount_opts { int ishelp; int flags; int nonempty; + int auto_unmount; int blkdev; char *fsname; char *subtype; @@ -70,11 +86,13 @@ static const struct fuse_opt fuse_mount_opts[] = { FUSE_MOUNT_OPT("allow_root", allow_root), FUSE_MOUNT_OPT("nonempty", nonempty), FUSE_MOUNT_OPT("blkdev", blkdev), + FUSE_MOUNT_OPT("auto_unmount", auto_unmount), FUSE_MOUNT_OPT("fsname=%s", fsname), FUSE_MOUNT_OPT("subtype=%s", subtype), FUSE_OPT_KEY("allow_other", KEY_KERN_OPT), FUSE_OPT_KEY("allow_root", KEY_ALLOW_ROOT), FUSE_OPT_KEY("nonempty", KEY_FUSERMOUNT_OPT), + FUSE_OPT_KEY("auto_unmount", KEY_FUSERMOUNT_OPT), FUSE_OPT_KEY("blkdev", KEY_FUSERMOUNT_OPT), FUSE_OPT_KEY("fsname=", KEY_FUSERMOUNT_OPT), FUSE_OPT_KEY("subtype=", KEY_SUBTYPE_OPT), @@ -110,6 +128,7 @@ static void mount_help(void) fprintf(stderr, " -o allow_other allow access to other users\n" " -o allow_root allow access to root\n" +" -o auto_unmount auto unmount on process termination\n" " -o nonempty allow mounts over non-empty file/dir\n" " -o default_permissions enable permission checking by kernel\n" " -o fsname=NAME set filesystem name\n" @@ -120,7 +139,6 @@ static void mount_help(void) } #define FUSERMOUNT_DIR "/usr/bin" - static void exec_fusermount(const char *argv[]) { execv(FUSERMOUNT_DIR "/" FUSERMOUNT_PROG, (char **) argv); @@ -144,7 +162,7 @@ struct mount_flags { int on; }; -static struct mount_flags mount_flags[] = { +static const struct mount_flags mount_flags[] = { {"rw", MS_RDONLY, 0}, {"ro", MS_RDONLY, 1}, {"suid", MS_NOSUID, 0}, @@ -157,7 +175,9 @@ static struct mount_flags mount_flags[] = { {"sync", MS_SYNCHRONOUS, 1}, {"atime", MS_NOATIME, 0}, {"noatime", MS_NOATIME, 1}, +#ifndef __NetBSD__ {"dirsync", MS_DIRSYNC, 1}, +#endif {NULL, 0, 0} }; @@ -239,6 +259,7 @@ static int receive_fd(int fd) iov.iov_base = buf; iov.iov_len = 1; + memset(&msg, 0, sizeof(msg)); msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &iov; @@ -319,8 +340,8 @@ void fuse_unmount_compat22(const char *mountpoint) fuse_kern_unmount(mountpoint, -1); } -static int fuse_mount_fusermount(const char *mountpoint, const char *opts, - int quiet) +static int fuse_mount_fusermount(const char *mountpoint, struct mount_opts *mo, + const char *opts, int quiet) { int fds[2], pid; int res; @@ -352,8 +373,10 @@ static int fuse_mount_fusermount(const char *mountpoint, const char *opts, if (quiet) { int fd = open("/dev/null", O_RDONLY); - dup2(fd, 1); - dup2(fd, 2); + if (fd != -1) { + dup2(fd, 1); + dup2(fd, 2); + } } argv[a++] = FUSERMOUNT_PROG; @@ -376,15 +399,24 @@ static int fuse_mount_fusermount(const char *mountpoint, const char *opts, close(fds[0]); rv = receive_fd(fds[1]); - close(fds[1]); - waitpid(pid, NULL, 0); /* bury zombie */ + + if (!mo->auto_unmount) { + /* with auto_unmount option fusermount will not exit until + this socket is closed */ + close(fds[1]); + waitpid(pid, NULL, 0); /* bury zombie */ + } return rv; } int fuse_mount_compat22(const char *mountpoint, const char *opts) { - return fuse_mount_fusermount(mountpoint, opts, 0); + struct mount_opts mo; + memset(&mo, 0, sizeof(mo)); + mo.flags = MS_NOSUID | MS_NODEV; + + return fuse_mount_fusermount(mountpoint, &mo, opts, 0); } static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, @@ -417,6 +449,12 @@ static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, return -1; } + if (mo->auto_unmount) { + /* Tell the caller to fallback to fusermount because + auto-unmount does not work otherwise. */ + return -2; + } + fd = open(devname, O_RDWR); if (fd == -1) { if (errno == ENODEV || errno == ENOENT) @@ -486,6 +524,8 @@ static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, goto out_close; } +#ifndef __NetBSD__ +#ifndef IGNORE_MTAB if (geteuid() == 0) { char *newmnt = fuse_mnt_resolve_path("fuse", mnt); res = -1; @@ -498,6 +538,8 @@ static int fuse_mount_sys(const char *mnt, struct mount_opts *mo, if (res == -1) goto out_umount; } +#endif /* IGNORE_MTAB */ +#endif /* __NetBSD__ */ free(type); free(source); @@ -572,13 +614,13 @@ int fuse_kern_mount(const char *mountpoint, struct fuse_args *args) goto out; } - res = fuse_mount_fusermount(mountpoint, tmp_opts, 1); + res = fuse_mount_fusermount(mountpoint, &mo, tmp_opts, 1); free(tmp_opts); if (res == -1) - res = fuse_mount_fusermount(mountpoint, + res = fuse_mount_fusermount(mountpoint, &mo, mnt_opts, 0); } else { - res = fuse_mount_fusermount(mountpoint, mnt_opts, 0); + res = fuse_mount_fusermount(mountpoint, &mo, mnt_opts, 0); } } out: diff --git a/fuse/mount_util.c b/fuse/mount_util.c index de1bd6714..bfd801fff 100644 --- a/fuse/mount_util.c +++ b/fuse/mount_util.c @@ -11,18 +11,22 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include #include -#include - +#ifdef __NetBSD__ +#define umount2(mnt, flags) unmount(mnt, (flags == 2) ? MNT_FORCE : 0) +#define mtab_needs_update(mnt) 0 +#else static int mtab_needs_update(const char *mnt) { int res; @@ -45,19 +49,31 @@ static int mtab_needs_update(const char *mnt) if (errno == ENOENT) return 0; } else { + uid_t ruid; + int err; + if (S_ISLNK(stbuf.st_mode)) return 0; + ruid = getuid(); + if (ruid != 0) + setreuid(0, -1); + res = access(_PATH_MOUNTED, W_OK); - if (res == -1 && errno == EROFS) + err = (res == -1) ? errno : 0; + if (ruid != 0) + setreuid(ruid, -1); + + if (err == EROFS) return 0; } return 1; } +#endif /* __NetBSD__ */ -static int add_mount_legacy(const char *progname, const char *fsname, - const char *mnt, const char *type, const char *opts) +static int add_mount(const char *progname, const char *fsname, + const char *mnt, const char *type, const char *opts) { int res; int status; @@ -78,31 +94,10 @@ static int add_mount_legacy(const char *progname, const char *fsname, goto out_restore; } if (res == 0) { - char templ[] = "/tmp/fusermountXXXXXX"; - char *tmp; - sigprocmask(SIG_SETMASK, &oldmask, NULL); setuid(geteuid()); - - /* - * hide in a directory, where mount isn't able to resolve - * fsname as a valid path - */ - tmp = mkdtemp(templ); - if (!tmp) { - fprintf(stderr, - "%s: failed to create temporary directory\n", - progname); - exit(1); - } - if (chdir(tmp)) { - fprintf(stderr, "%s: failed to chdir to %s: %s\n", - progname, tmp, strerror(errno)); - exit(1); - } - rmdir(tmp); - execl("/bin/mount", "/bin/mount", "-i", "-f", "-t", type, - "-o", opts, fsname, mnt, NULL); + execl("/bin/mount", "/bin/mount", "--no-canonicalize", "-i", + "-f", "-t", type, "-o", opts, fsname, mnt, NULL); fprintf(stderr, "%s: failed to execute /bin/mount: %s\n", progname, strerror(errno)); exit(1); @@ -120,8 +115,16 @@ static int add_mount_legacy(const char *progname, const char *fsname, return res; } -static int add_mount(const char *progname, const char *fsname, +int fuse_mnt_add_mount(const char *progname, const char *fsname, const char *mnt, const char *type, const char *opts) +{ + if (!mtab_needs_update(mnt)) + return 0; + + return add_mount(progname, fsname, mnt, type, opts); +} + +static int exec_umount(const char *progname, const char *rel_mnt, int lazy) { int res; int status; @@ -142,19 +145,11 @@ static int add_mount(const char *progname, const char *fsname, goto out_restore; } if (res == 0) { - /* - * Hide output, because old versions don't support - * --no-canonicalize - */ - int fd = open("/dev/null", O_RDONLY); - dup2(fd, 1); - dup2(fd, 2); - sigprocmask(SIG_SETMASK, &oldmask, NULL); setuid(geteuid()); - execl("/bin/mount", "/bin/mount", "--no-canonicalize", "-i", - "-f", "-t", type, "-o", opts, fsname, mnt, NULL); - fprintf(stderr, "%s: failed to execute /bin/mount: %s\n", + execl("/bin/umount", "/bin/umount", "-i", rel_mnt, + lazy ? "-l" : NULL, NULL); + fprintf(stderr, "%s: failed to execute /bin/umount: %s\n", progname, strerror(errno)); exit(1); } @@ -162,31 +157,33 @@ static int add_mount(const char *progname, const char *fsname, if (res == -1) fprintf(stderr, "%s: waitpid: %s\n", progname, strerror(errno)); - if (status != 0) + if (status != 0) { res = -1; + } out_restore: sigprocmask(SIG_SETMASK, &oldmask, NULL); - return res; + } -int fuse_mnt_add_mount(const char *progname, const char *fsname, - const char *mnt, const char *type, const char *opts) +int fuse_mnt_umount(const char *progname, const char *abs_mnt, + const char *rel_mnt, int lazy) { int res; - if (!mtab_needs_update(mnt)) - return 0; - - res = add_mount(progname, fsname, mnt, type, opts); - if (res == -1) - res = add_mount_legacy(progname, fsname, mnt, type, opts); + if (!mtab_needs_update(abs_mnt)) { + res = umount2(rel_mnt, lazy ? 2 : 0); + if (res == -1) + fprintf(stderr, "%s: failed to unmount %s: %s\n", + progname, abs_mnt, strerror(errno)); + return res; + } - return res; + return exec_umount(progname, rel_mnt, lazy); } -static int exec_umount(const char *progname, const char *rel_mnt, int lazy) +static int remove_mount(const char *progname, const char *mnt) { int res; int status; @@ -209,8 +206,8 @@ static int exec_umount(const char *progname, const char *rel_mnt, int lazy) if (res == 0) { sigprocmask(SIG_SETMASK, &oldmask, NULL); setuid(geteuid()); - execl("/bin/umount", "/bin/umount", "-i", rel_mnt, - lazy ? "-l" : NULL, NULL); + execl("/bin/umount", "/bin/umount", "--no-canonicalize", "-i", + "--fake", mnt, NULL); fprintf(stderr, "%s: failed to execute /bin/umount: %s\n", progname, strerror(errno)); exit(1); @@ -219,30 +216,20 @@ static int exec_umount(const char *progname, const char *rel_mnt, int lazy) if (res == -1) fprintf(stderr, "%s: waitpid: %s\n", progname, strerror(errno)); - if (status != 0) { + if (status != 0) res = -1; - } out_restore: sigprocmask(SIG_SETMASK, &oldmask, NULL); return res; - } -int fuse_mnt_umount(const char *progname, const char *abs_mnt, - const char *rel_mnt, int lazy) +int fuse_mnt_remove_mount(const char *progname, const char *mnt) { - int res; - - if (!mtab_needs_update(abs_mnt)) { - res = umount2(rel_mnt, lazy ? 2 : 0); - if (res == -1) - fprintf(stderr, "%s: failed to unmount %s: %s\n", - progname, abs_mnt, strerror(errno)); - return res; - } + if (!mtab_needs_update(mnt)) + return 0; - return exec_umount(progname, rel_mnt, lazy); + return remove_mount(progname, mnt); } char *fuse_mnt_resolve_path(const char *progname, const char *orig) diff --git a/fuse/mount_util.h b/fuse/mount_util.h index 5c2fb8e6e..f89c1154d 100644 --- a/fuse/mount_util.h +++ b/fuse/mount_util.h @@ -10,6 +10,7 @@ int fuse_mnt_add_mount(const char *progname, const char *fsname, const char *mnt, const char *type, const char *opts); +int fuse_mnt_remove_mount(const char *progname, const char *mnt); int fuse_mnt_umount(const char *progname, const char *abs_mnt, const char *rel_mnt, int lazy); char *fuse_mnt_resolve_path(const char *progname, const char *orig); diff --git a/fuse/ulockmgr.c b/fuse/ulockmgr.c index 4a049d5b2..f0523ae70 100644 --- a/fuse/ulockmgr.c +++ b/fuse/ulockmgr.c @@ -400,6 +400,10 @@ int ulockmgr_op(int fd, int cmd, struct flock *lock, const void *owner, if (cmd != F_GETLK && cmd != F_SETLK && cmd != F_SETLKW) return -EINVAL; + if (lock->l_type != F_RDLCK && lock->l_type != F_WRLCK && + lock->l_type != F_UNLCK) + return -EINVAL; + if (lock->l_whence != SEEK_SET && lock->l_whence != SEEK_CUR && lock->l_whence != SEEK_END) return -EINVAL; -- cgit v1.2.3