1/*
2  FUSE: Filesystem in Userspace
3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5  This program can be distributed under the terms of the GNU GPL.
6  See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/pagemap.h>
12#include <linux/file.h>
13#include <linux/sched.h>
14#include <linux/namei.h>
15#include <linux/slab.h>
16
17static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
18{
19	struct fuse_conn *fc = get_fuse_conn(dir);
20	struct fuse_inode *fi = get_fuse_inode(dir);
21
22	if (!fc->do_readdirplus)
23		return false;
24	if (!fc->readdirplus_auto)
25		return true;
26	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27		return true;
28	if (ctx->pos == 0)
29		return true;
30	return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35	struct fuse_inode *fi = get_fuse_inode(dir);
36
37	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
40#if BITS_PER_LONG >= 64
41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
42{
43	entry->d_time = time;
44}
45
46static inline u64 fuse_dentry_time(struct dentry *entry)
47{
48	return entry->d_time;
49}
50#else
51/*
52 * On 32 bit archs store the high 32 bits of time in d_fsdata
53 */
54static void fuse_dentry_settime(struct dentry *entry, u64 time)
55{
56	entry->d_time = time;
57	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
58}
59
60static u64 fuse_dentry_time(struct dentry *entry)
61{
62	return (u64) entry->d_time +
63		((u64) (unsigned long) entry->d_fsdata << 32);
64}
65#endif
66
67/*
68 * FUSE caches dentries and attributes with separate timeout.  The
69 * time in jiffies until the dentry/attributes are valid is stored in
70 * dentry->d_time and fuse_inode->i_time respectively.
71 */
72
73/*
74 * Calculate the time in jiffies until a dentry/attributes are valid
75 */
76static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
77{
78	if (sec || nsec) {
79		struct timespec ts = {sec, nsec};
80		return get_jiffies_64() + timespec_to_jiffies(&ts);
81	} else
82		return 0;
83}
84
85/*
86 * Set dentry and possibly attribute timeouts from the lookup/mk*
87 * replies
88 */
89static void fuse_change_entry_timeout(struct dentry *entry,
90				      struct fuse_entry_out *o)
91{
92	fuse_dentry_settime(entry,
93		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
94}
95
96static u64 attr_timeout(struct fuse_attr_out *o)
97{
98	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
99}
100
101static u64 entry_attr_timeout(struct fuse_entry_out *o)
102{
103	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
104}
105
106/*
107 * Mark the attributes as stale, so that at the next call to
108 * ->getattr() they will be fetched from userspace
109 */
110void fuse_invalidate_attr(struct inode *inode)
111{
112	get_fuse_inode(inode)->i_time = 0;
113}
114
115/**
116 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
117 * atime is not used.
118 */
119void fuse_invalidate_atime(struct inode *inode)
120{
121	if (!IS_RDONLY(inode))
122		fuse_invalidate_attr(inode);
123}
124
125/*
126 * Just mark the entry as stale, so that a next attempt to look it up
127 * will result in a new lookup call to userspace
128 *
129 * This is called when a dentry is about to become negative and the
130 * timeout is unknown (unlink, rmdir, rename and in some cases
131 * lookup)
132 */
133void fuse_invalidate_entry_cache(struct dentry *entry)
134{
135	fuse_dentry_settime(entry, 0);
136}
137
138/*
139 * Same as fuse_invalidate_entry_cache(), but also try to remove the
140 * dentry from the hash
141 */
142static void fuse_invalidate_entry(struct dentry *entry)
143{
144	d_invalidate(entry);
145	fuse_invalidate_entry_cache(entry);
146}
147
148static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
149			     u64 nodeid, struct qstr *name,
150			     struct fuse_entry_out *outarg)
151{
152	memset(outarg, 0, sizeof(struct fuse_entry_out));
153	args->in.h.opcode = FUSE_LOOKUP;
154	args->in.h.nodeid = nodeid;
155	args->in.numargs = 1;
156	args->in.args[0].size = name->len + 1;
157	args->in.args[0].value = name->name;
158	args->out.numargs = 1;
159	args->out.args[0].size = sizeof(struct fuse_entry_out);
160	args->out.args[0].value = outarg;
161}
162
163u64 fuse_get_attr_version(struct fuse_conn *fc)
164{
165	u64 curr_version;
166
167	/*
168	 * The spin lock isn't actually needed on 64bit archs, but we
169	 * don't yet care too much about such optimizations.
170	 */
171	spin_lock(&fc->lock);
172	curr_version = fc->attr_version;
173	spin_unlock(&fc->lock);
174
175	return curr_version;
176}
177
178/*
179 * Check whether the dentry is still valid
180 *
181 * If the entry validity timeout has expired and the dentry is
182 * positive, try to redo the lookup.  If the lookup results in a
183 * different inode, then let the VFS invalidate the dentry and redo
184 * the lookup once more.  If the lookup results in the same inode,
185 * then refresh the attributes, timeouts and mark the dentry valid.
186 */
187static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
188{
189	struct inode *inode;
190	struct dentry *parent;
191	struct fuse_conn *fc;
192	struct fuse_inode *fi;
193	int ret;
194
195	inode = d_inode_rcu(entry);
196	if (inode && is_bad_inode(inode))
197		goto invalid;
198	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
199		 (flags & LOOKUP_REVAL)) {
200		struct fuse_entry_out outarg;
201		FUSE_ARGS(args);
202		struct fuse_forget_link *forget;
203		u64 attr_version;
204
205		/* For negative dentries, always do a fresh lookup */
206		if (!inode)
207			goto invalid;
208
209		ret = -ECHILD;
210		if (flags & LOOKUP_RCU)
211			goto out;
212
213		fc = get_fuse_conn(inode);
214
215		forget = fuse_alloc_forget();
216		ret = -ENOMEM;
217		if (!forget)
218			goto out;
219
220		attr_version = fuse_get_attr_version(fc);
221
222		parent = dget_parent(entry);
223		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
224				 &entry->d_name, &outarg);
225		ret = fuse_simple_request(fc, &args);
226		dput(parent);
227		/* Zero nodeid is same as -ENOENT */
228		if (!ret && !outarg.nodeid)
229			ret = -ENOENT;
230		if (!ret) {
231			fi = get_fuse_inode(inode);
232			if (outarg.nodeid != get_node_id(inode)) {
233				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
234				goto invalid;
235			}
236			spin_lock(&fc->lock);
237			fi->nlookup++;
238			spin_unlock(&fc->lock);
239		}
240		kfree(forget);
241		if (ret == -ENOMEM)
242			goto out;
243		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
244			goto invalid;
245
246		fuse_change_attributes(inode, &outarg.attr,
247				       entry_attr_timeout(&outarg),
248				       attr_version);
249		fuse_change_entry_timeout(entry, &outarg);
250	} else if (inode) {
251		fi = get_fuse_inode(inode);
252		if (flags & LOOKUP_RCU) {
253			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
254				return -ECHILD;
255		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
256			parent = dget_parent(entry);
257			fuse_advise_use_readdirplus(d_inode(parent));
258			dput(parent);
259		}
260	}
261	ret = 1;
262out:
263	return ret;
264
265invalid:
266	ret = 0;
267	goto out;
268}
269
270static int invalid_nodeid(u64 nodeid)
271{
272	return !nodeid || nodeid == FUSE_ROOT_ID;
273}
274
275const struct dentry_operations fuse_dentry_operations = {
276	.d_revalidate	= fuse_dentry_revalidate,
277};
278
279int fuse_valid_type(int m)
280{
281	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
282		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
283}
284
285int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
286		     struct fuse_entry_out *outarg, struct inode **inode)
287{
288	struct fuse_conn *fc = get_fuse_conn_super(sb);
289	FUSE_ARGS(args);
290	struct fuse_forget_link *forget;
291	u64 attr_version;
292	int err;
293
294	*inode = NULL;
295	err = -ENAMETOOLONG;
296	if (name->len > FUSE_NAME_MAX)
297		goto out;
298
299
300	forget = fuse_alloc_forget();
301	err = -ENOMEM;
302	if (!forget)
303		goto out;
304
305	attr_version = fuse_get_attr_version(fc);
306
307	fuse_lookup_init(fc, &args, nodeid, name, outarg);
308	err = fuse_simple_request(fc, &args);
309	/* Zero nodeid is same as -ENOENT, but with valid timeout */
310	if (err || !outarg->nodeid)
311		goto out_put_forget;
312
313	err = -EIO;
314	if (!outarg->nodeid)
315		goto out_put_forget;
316	if (!fuse_valid_type(outarg->attr.mode))
317		goto out_put_forget;
318
319	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
320			   &outarg->attr, entry_attr_timeout(outarg),
321			   attr_version);
322	err = -ENOMEM;
323	if (!*inode) {
324		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
325		goto out;
326	}
327	err = 0;
328
329 out_put_forget:
330	kfree(forget);
331 out:
332	return err;
333}
334
335static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
336				  unsigned int flags)
337{
338	int err;
339	struct fuse_entry_out outarg;
340	struct inode *inode;
341	struct dentry *newent;
342	bool outarg_valid = true;
343
344	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
345			       &outarg, &inode);
346	if (err == -ENOENT) {
347		outarg_valid = false;
348		err = 0;
349	}
350	if (err)
351		goto out_err;
352
353	err = -EIO;
354	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
355		goto out_iput;
356
357	newent = d_splice_alias(inode, entry);
358	err = PTR_ERR(newent);
359	if (IS_ERR(newent))
360		goto out_err;
361
362	entry = newent ? newent : entry;
363	if (outarg_valid)
364		fuse_change_entry_timeout(entry, &outarg);
365	else
366		fuse_invalidate_entry_cache(entry);
367
368	fuse_advise_use_readdirplus(dir);
369	return newent;
370
371 out_iput:
372	iput(inode);
373 out_err:
374	return ERR_PTR(err);
375}
376
377/*
378 * Atomic create+open operation
379 *
380 * If the filesystem doesn't support this, then fall back to separate
381 * 'mknod' + 'open' requests.
382 */
383static int fuse_create_open(struct inode *dir, struct dentry *entry,
384			    struct file *file, unsigned flags,
385			    umode_t mode, int *opened)
386{
387	int err;
388	struct inode *inode;
389	struct fuse_conn *fc = get_fuse_conn(dir);
390	FUSE_ARGS(args);
391	struct fuse_forget_link *forget;
392	struct fuse_create_in inarg;
393	struct fuse_open_out outopen;
394	struct fuse_entry_out outentry;
395	struct fuse_file *ff;
396
397	/* Userspace expects S_IFREG in create mode */
398	BUG_ON((mode & S_IFMT) != S_IFREG);
399
400	forget = fuse_alloc_forget();
401	err = -ENOMEM;
402	if (!forget)
403		goto out_err;
404
405	err = -ENOMEM;
406	ff = fuse_file_alloc(fc);
407	if (!ff)
408		goto out_put_forget_req;
409
410	if (!fc->dont_mask)
411		mode &= ~current_umask();
412
413	flags &= ~O_NOCTTY;
414	memset(&inarg, 0, sizeof(inarg));
415	memset(&outentry, 0, sizeof(outentry));
416	inarg.flags = flags;
417	inarg.mode = mode;
418	inarg.umask = current_umask();
419	args.in.h.opcode = FUSE_CREATE;
420	args.in.h.nodeid = get_node_id(dir);
421	args.in.numargs = 2;
422	args.in.args[0].size = sizeof(inarg);
423	args.in.args[0].value = &inarg;
424	args.in.args[1].size = entry->d_name.len + 1;
425	args.in.args[1].value = entry->d_name.name;
426	args.out.numargs = 2;
427	args.out.args[0].size = sizeof(outentry);
428	args.out.args[0].value = &outentry;
429	args.out.args[1].size = sizeof(outopen);
430	args.out.args[1].value = &outopen;
431	err = fuse_simple_request(fc, &args);
432	if (err)
433		goto out_free_ff;
434
435	err = -EIO;
436	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
437		goto out_free_ff;
438
439	ff->fh = outopen.fh;
440	ff->nodeid = outentry.nodeid;
441	ff->open_flags = outopen.open_flags;
442	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
443			  &outentry.attr, entry_attr_timeout(&outentry), 0);
444	if (!inode) {
445		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
446		fuse_sync_release(ff, flags);
447		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
448		err = -ENOMEM;
449		goto out_err;
450	}
451	kfree(forget);
452	d_instantiate(entry, inode);
453	fuse_change_entry_timeout(entry, &outentry);
454	fuse_invalidate_attr(dir);
455	err = finish_open(file, entry, generic_file_open, opened);
456	if (err) {
457		fuse_sync_release(ff, flags);
458	} else {
459		file->private_data = fuse_file_get(ff);
460		fuse_finish_open(inode, file);
461	}
462	return err;
463
464out_free_ff:
465	fuse_file_free(ff);
466out_put_forget_req:
467	kfree(forget);
468out_err:
469	return err;
470}
471
472static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
473static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
474			    struct file *file, unsigned flags,
475			    umode_t mode, int *opened)
476{
477	int err;
478	struct fuse_conn *fc = get_fuse_conn(dir);
479	struct dentry *res = NULL;
480
481	if (d_unhashed(entry)) {
482		res = fuse_lookup(dir, entry, 0);
483		if (IS_ERR(res))
484			return PTR_ERR(res);
485
486		if (res)
487			entry = res;
488	}
489
490	if (!(flags & O_CREAT) || d_really_is_positive(entry))
491		goto no_open;
492
493	/* Only creates */
494	*opened |= FILE_CREATED;
495
496	if (fc->no_create)
497		goto mknod;
498
499	err = fuse_create_open(dir, entry, file, flags, mode, opened);
500	if (err == -ENOSYS) {
501		fc->no_create = 1;
502		goto mknod;
503	}
504out_dput:
505	dput(res);
506	return err;
507
508mknod:
509	err = fuse_mknod(dir, entry, mode, 0);
510	if (err)
511		goto out_dput;
512no_open:
513	return finish_no_open(file, res);
514}
515
516/*
517 * Code shared between mknod, mkdir, symlink and link
518 */
519static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
520			    struct inode *dir, struct dentry *entry,
521			    umode_t mode)
522{
523	struct fuse_entry_out outarg;
524	struct inode *inode;
525	int err;
526	struct fuse_forget_link *forget;
527
528	forget = fuse_alloc_forget();
529	if (!forget)
530		return -ENOMEM;
531
532	memset(&outarg, 0, sizeof(outarg));
533	args->in.h.nodeid = get_node_id(dir);
534	args->out.numargs = 1;
535	args->out.args[0].size = sizeof(outarg);
536	args->out.args[0].value = &outarg;
537	err = fuse_simple_request(fc, args);
538	if (err)
539		goto out_put_forget_req;
540
541	err = -EIO;
542	if (invalid_nodeid(outarg.nodeid))
543		goto out_put_forget_req;
544
545	if ((outarg.attr.mode ^ mode) & S_IFMT)
546		goto out_put_forget_req;
547
548	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
549			  &outarg.attr, entry_attr_timeout(&outarg), 0);
550	if (!inode) {
551		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
552		return -ENOMEM;
553	}
554	kfree(forget);
555
556	err = d_instantiate_no_diralias(entry, inode);
557	if (err)
558		return err;
559
560	fuse_change_entry_timeout(entry, &outarg);
561	fuse_invalidate_attr(dir);
562	return 0;
563
564 out_put_forget_req:
565	kfree(forget);
566	return err;
567}
568
569static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
570		      dev_t rdev)
571{
572	struct fuse_mknod_in inarg;
573	struct fuse_conn *fc = get_fuse_conn(dir);
574	FUSE_ARGS(args);
575
576	if (!fc->dont_mask)
577		mode &= ~current_umask();
578
579	memset(&inarg, 0, sizeof(inarg));
580	inarg.mode = mode;
581	inarg.rdev = new_encode_dev(rdev);
582	inarg.umask = current_umask();
583	args.in.h.opcode = FUSE_MKNOD;
584	args.in.numargs = 2;
585	args.in.args[0].size = sizeof(inarg);
586	args.in.args[0].value = &inarg;
587	args.in.args[1].size = entry->d_name.len + 1;
588	args.in.args[1].value = entry->d_name.name;
589	return create_new_entry(fc, &args, dir, entry, mode);
590}
591
592static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
593		       bool excl)
594{
595	return fuse_mknod(dir, entry, mode, 0);
596}
597
598static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
599{
600	struct fuse_mkdir_in inarg;
601	struct fuse_conn *fc = get_fuse_conn(dir);
602	FUSE_ARGS(args);
603
604	if (!fc->dont_mask)
605		mode &= ~current_umask();
606
607	memset(&inarg, 0, sizeof(inarg));
608	inarg.mode = mode;
609	inarg.umask = current_umask();
610	args.in.h.opcode = FUSE_MKDIR;
611	args.in.numargs = 2;
612	args.in.args[0].size = sizeof(inarg);
613	args.in.args[0].value = &inarg;
614	args.in.args[1].size = entry->d_name.len + 1;
615	args.in.args[1].value = entry->d_name.name;
616	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
617}
618
619static int fuse_symlink(struct inode *dir, struct dentry *entry,
620			const char *link)
621{
622	struct fuse_conn *fc = get_fuse_conn(dir);
623	unsigned len = strlen(link) + 1;
624	FUSE_ARGS(args);
625
626	args.in.h.opcode = FUSE_SYMLINK;
627	args.in.numargs = 2;
628	args.in.args[0].size = entry->d_name.len + 1;
629	args.in.args[0].value = entry->d_name.name;
630	args.in.args[1].size = len;
631	args.in.args[1].value = link;
632	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
633}
634
635static inline void fuse_update_ctime(struct inode *inode)
636{
637	if (!IS_NOCMTIME(inode)) {
638		inode->i_ctime = current_fs_time(inode->i_sb);
639		mark_inode_dirty_sync(inode);
640	}
641}
642
643static int fuse_unlink(struct inode *dir, struct dentry *entry)
644{
645	int err;
646	struct fuse_conn *fc = get_fuse_conn(dir);
647	FUSE_ARGS(args);
648
649	args.in.h.opcode = FUSE_UNLINK;
650	args.in.h.nodeid = get_node_id(dir);
651	args.in.numargs = 1;
652	args.in.args[0].size = entry->d_name.len + 1;
653	args.in.args[0].value = entry->d_name.name;
654	err = fuse_simple_request(fc, &args);
655	if (!err) {
656		struct inode *inode = d_inode(entry);
657		struct fuse_inode *fi = get_fuse_inode(inode);
658
659		spin_lock(&fc->lock);
660		fi->attr_version = ++fc->attr_version;
661		/*
662		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
663		 * happen if userspace filesystem is careless.  It would be
664		 * difficult to enforce correct nlink usage so just ignore this
665		 * condition here
666		 */
667		if (inode->i_nlink > 0)
668			drop_nlink(inode);
669		spin_unlock(&fc->lock);
670		fuse_invalidate_attr(inode);
671		fuse_invalidate_attr(dir);
672		fuse_invalidate_entry_cache(entry);
673		fuse_update_ctime(inode);
674	} else if (err == -EINTR)
675		fuse_invalidate_entry(entry);
676	return err;
677}
678
679static int fuse_rmdir(struct inode *dir, struct dentry *entry)
680{
681	int err;
682	struct fuse_conn *fc = get_fuse_conn(dir);
683	FUSE_ARGS(args);
684
685	args.in.h.opcode = FUSE_RMDIR;
686	args.in.h.nodeid = get_node_id(dir);
687	args.in.numargs = 1;
688	args.in.args[0].size = entry->d_name.len + 1;
689	args.in.args[0].value = entry->d_name.name;
690	err = fuse_simple_request(fc, &args);
691	if (!err) {
692		clear_nlink(d_inode(entry));
693		fuse_invalidate_attr(dir);
694		fuse_invalidate_entry_cache(entry);
695	} else if (err == -EINTR)
696		fuse_invalidate_entry(entry);
697	return err;
698}
699
700static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
701			      struct inode *newdir, struct dentry *newent,
702			      unsigned int flags, int opcode, size_t argsize)
703{
704	int err;
705	struct fuse_rename2_in inarg;
706	struct fuse_conn *fc = get_fuse_conn(olddir);
707	FUSE_ARGS(args);
708
709	memset(&inarg, 0, argsize);
710	inarg.newdir = get_node_id(newdir);
711	inarg.flags = flags;
712	args.in.h.opcode = opcode;
713	args.in.h.nodeid = get_node_id(olddir);
714	args.in.numargs = 3;
715	args.in.args[0].size = argsize;
716	args.in.args[0].value = &inarg;
717	args.in.args[1].size = oldent->d_name.len + 1;
718	args.in.args[1].value = oldent->d_name.name;
719	args.in.args[2].size = newent->d_name.len + 1;
720	args.in.args[2].value = newent->d_name.name;
721	err = fuse_simple_request(fc, &args);
722	if (!err) {
723		/* ctime changes */
724		fuse_invalidate_attr(d_inode(oldent));
725		fuse_update_ctime(d_inode(oldent));
726
727		if (flags & RENAME_EXCHANGE) {
728			fuse_invalidate_attr(d_inode(newent));
729			fuse_update_ctime(d_inode(newent));
730		}
731
732		fuse_invalidate_attr(olddir);
733		if (olddir != newdir)
734			fuse_invalidate_attr(newdir);
735
736		/* newent will end up negative */
737		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
738			fuse_invalidate_attr(d_inode(newent));
739			fuse_invalidate_entry_cache(newent);
740			fuse_update_ctime(d_inode(newent));
741		}
742	} else if (err == -EINTR) {
743		/* If request was interrupted, DEITY only knows if the
744		   rename actually took place.  If the invalidation
745		   fails (e.g. some process has CWD under the renamed
746		   directory), then there can be inconsistency between
747		   the dcache and the real filesystem.  Tough luck. */
748		fuse_invalidate_entry(oldent);
749		if (d_really_is_positive(newent))
750			fuse_invalidate_entry(newent);
751	}
752
753	return err;
754}
755
756static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
757			struct inode *newdir, struct dentry *newent,
758			unsigned int flags)
759{
760	struct fuse_conn *fc = get_fuse_conn(olddir);
761	int err;
762
763	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
764		return -EINVAL;
765
766	if (flags) {
767		if (fc->no_rename2 || fc->minor < 23)
768			return -EINVAL;
769
770		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
771					 FUSE_RENAME2,
772					 sizeof(struct fuse_rename2_in));
773		if (err == -ENOSYS) {
774			fc->no_rename2 = 1;
775			err = -EINVAL;
776		}
777	} else {
778		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
779					 FUSE_RENAME,
780					 sizeof(struct fuse_rename_in));
781	}
782
783	return err;
784}
785
786static int fuse_link(struct dentry *entry, struct inode *newdir,
787		     struct dentry *newent)
788{
789	int err;
790	struct fuse_link_in inarg;
791	struct inode *inode = d_inode(entry);
792	struct fuse_conn *fc = get_fuse_conn(inode);
793	FUSE_ARGS(args);
794
795	memset(&inarg, 0, sizeof(inarg));
796	inarg.oldnodeid = get_node_id(inode);
797	args.in.h.opcode = FUSE_LINK;
798	args.in.numargs = 2;
799	args.in.args[0].size = sizeof(inarg);
800	args.in.args[0].value = &inarg;
801	args.in.args[1].size = newent->d_name.len + 1;
802	args.in.args[1].value = newent->d_name.name;
803	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
804	/* Contrary to "normal" filesystems it can happen that link
805	   makes two "logical" inodes point to the same "physical"
806	   inode.  We invalidate the attributes of the old one, so it
807	   will reflect changes in the backing inode (link count,
808	   etc.)
809	*/
810	if (!err) {
811		struct fuse_inode *fi = get_fuse_inode(inode);
812
813		spin_lock(&fc->lock);
814		fi->attr_version = ++fc->attr_version;
815		inc_nlink(inode);
816		spin_unlock(&fc->lock);
817		fuse_invalidate_attr(inode);
818		fuse_update_ctime(inode);
819	} else if (err == -EINTR) {
820		fuse_invalidate_attr(inode);
821	}
822	return err;
823}
824
825static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
826			  struct kstat *stat)
827{
828	unsigned int blkbits;
829	struct fuse_conn *fc = get_fuse_conn(inode);
830
831	/* see the comment in fuse_change_attributes() */
832	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
833		attr->size = i_size_read(inode);
834		attr->mtime = inode->i_mtime.tv_sec;
835		attr->mtimensec = inode->i_mtime.tv_nsec;
836		attr->ctime = inode->i_ctime.tv_sec;
837		attr->ctimensec = inode->i_ctime.tv_nsec;
838	}
839
840	stat->dev = inode->i_sb->s_dev;
841	stat->ino = attr->ino;
842	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
843	stat->nlink = attr->nlink;
844	stat->uid = make_kuid(&init_user_ns, attr->uid);
845	stat->gid = make_kgid(&init_user_ns, attr->gid);
846	stat->rdev = inode->i_rdev;
847	stat->atime.tv_sec = attr->atime;
848	stat->atime.tv_nsec = attr->atimensec;
849	stat->mtime.tv_sec = attr->mtime;
850	stat->mtime.tv_nsec = attr->mtimensec;
851	stat->ctime.tv_sec = attr->ctime;
852	stat->ctime.tv_nsec = attr->ctimensec;
853	stat->size = attr->size;
854	stat->blocks = attr->blocks;
855
856	if (attr->blksize != 0)
857		blkbits = ilog2(attr->blksize);
858	else
859		blkbits = inode->i_sb->s_blocksize_bits;
860
861	stat->blksize = 1 << blkbits;
862}
863
864static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
865			   struct file *file)
866{
867	int err;
868	struct fuse_getattr_in inarg;
869	struct fuse_attr_out outarg;
870	struct fuse_conn *fc = get_fuse_conn(inode);
871	FUSE_ARGS(args);
872	u64 attr_version;
873
874	attr_version = fuse_get_attr_version(fc);
875
876	memset(&inarg, 0, sizeof(inarg));
877	memset(&outarg, 0, sizeof(outarg));
878	/* Directories have separate file-handle space */
879	if (file && S_ISREG(inode->i_mode)) {
880		struct fuse_file *ff = file->private_data;
881
882		inarg.getattr_flags |= FUSE_GETATTR_FH;
883		inarg.fh = ff->fh;
884	}
885	args.in.h.opcode = FUSE_GETATTR;
886	args.in.h.nodeid = get_node_id(inode);
887	args.in.numargs = 1;
888	args.in.args[0].size = sizeof(inarg);
889	args.in.args[0].value = &inarg;
890	args.out.numargs = 1;
891	args.out.args[0].size = sizeof(outarg);
892	args.out.args[0].value = &outarg;
893	err = fuse_simple_request(fc, &args);
894	if (!err) {
895		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
896			make_bad_inode(inode);
897			err = -EIO;
898		} else {
899			fuse_change_attributes(inode, &outarg.attr,
900					       attr_timeout(&outarg),
901					       attr_version);
902			if (stat)
903				fuse_fillattr(inode, &outarg.attr, stat);
904		}
905	}
906	return err;
907}
908
909int fuse_update_attributes(struct inode *inode, struct kstat *stat,
910			   struct file *file, bool *refreshed)
911{
912	struct fuse_inode *fi = get_fuse_inode(inode);
913	int err;
914	bool r;
915
916	if (time_before64(fi->i_time, get_jiffies_64())) {
917		r = true;
918		err = fuse_do_getattr(inode, stat, file);
919	} else {
920		r = false;
921		err = 0;
922		if (stat) {
923			generic_fillattr(inode, stat);
924			stat->mode = fi->orig_i_mode;
925			stat->ino = fi->orig_ino;
926		}
927	}
928
929	if (refreshed != NULL)
930		*refreshed = r;
931
932	return err;
933}
934
935int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
936			     u64 child_nodeid, struct qstr *name)
937{
938	int err = -ENOTDIR;
939	struct inode *parent;
940	struct dentry *dir;
941	struct dentry *entry;
942
943	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
944	if (!parent)
945		return -ENOENT;
946
947	mutex_lock(&parent->i_mutex);
948	if (!S_ISDIR(parent->i_mode))
949		goto unlock;
950
951	err = -ENOENT;
952	dir = d_find_alias(parent);
953	if (!dir)
954		goto unlock;
955
956	entry = d_lookup(dir, name);
957	dput(dir);
958	if (!entry)
959		goto unlock;
960
961	fuse_invalidate_attr(parent);
962	fuse_invalidate_entry(entry);
963
964	if (child_nodeid != 0 && d_really_is_positive(entry)) {
965		mutex_lock(&d_inode(entry)->i_mutex);
966		if (get_node_id(d_inode(entry)) != child_nodeid) {
967			err = -ENOENT;
968			goto badentry;
969		}
970		if (d_mountpoint(entry)) {
971			err = -EBUSY;
972			goto badentry;
973		}
974		if (d_is_dir(entry)) {
975			shrink_dcache_parent(entry);
976			if (!simple_empty(entry)) {
977				err = -ENOTEMPTY;
978				goto badentry;
979			}
980			d_inode(entry)->i_flags |= S_DEAD;
981		}
982		dont_mount(entry);
983		clear_nlink(d_inode(entry));
984		err = 0;
985 badentry:
986		mutex_unlock(&d_inode(entry)->i_mutex);
987		if (!err)
988			d_delete(entry);
989	} else {
990		err = 0;
991	}
992	dput(entry);
993
994 unlock:
995	mutex_unlock(&parent->i_mutex);
996	iput(parent);
997	return err;
998}
999
1000/*
1001 * Calling into a user-controlled filesystem gives the filesystem
1002 * daemon ptrace-like capabilities over the current process.  This
1003 * means, that the filesystem daemon is able to record the exact
1004 * filesystem operations performed, and can also control the behavior
1005 * of the requester process in otherwise impossible ways.  For example
1006 * it can delay the operation for arbitrary length of time allowing
1007 * DoS against the requester.
1008 *
1009 * For this reason only those processes can call into the filesystem,
1010 * for which the owner of the mount has ptrace privilege.  This
1011 * excludes processes started by other users, suid or sgid processes.
1012 */
1013int fuse_allow_current_process(struct fuse_conn *fc)
1014{
1015	const struct cred *cred;
1016
1017	if (fc->flags & FUSE_ALLOW_OTHER)
1018		return 1;
1019
1020	cred = current_cred();
1021	if (uid_eq(cred->euid, fc->user_id) &&
1022	    uid_eq(cred->suid, fc->user_id) &&
1023	    uid_eq(cred->uid,  fc->user_id) &&
1024	    gid_eq(cred->egid, fc->group_id) &&
1025	    gid_eq(cred->sgid, fc->group_id) &&
1026	    gid_eq(cred->gid,  fc->group_id))
1027		return 1;
1028
1029	return 0;
1030}
1031
1032static int fuse_access(struct inode *inode, int mask)
1033{
1034	struct fuse_conn *fc = get_fuse_conn(inode);
1035	FUSE_ARGS(args);
1036	struct fuse_access_in inarg;
1037	int err;
1038
1039	BUG_ON(mask & MAY_NOT_BLOCK);
1040
1041	if (fc->no_access)
1042		return 0;
1043
1044	memset(&inarg, 0, sizeof(inarg));
1045	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1046	args.in.h.opcode = FUSE_ACCESS;
1047	args.in.h.nodeid = get_node_id(inode);
1048	args.in.numargs = 1;
1049	args.in.args[0].size = sizeof(inarg);
1050	args.in.args[0].value = &inarg;
1051	err = fuse_simple_request(fc, &args);
1052	if (err == -ENOSYS) {
1053		fc->no_access = 1;
1054		err = 0;
1055	}
1056	return err;
1057}
1058
1059static int fuse_perm_getattr(struct inode *inode, int mask)
1060{
1061	if (mask & MAY_NOT_BLOCK)
1062		return -ECHILD;
1063
1064	return fuse_do_getattr(inode, NULL, NULL);
1065}
1066
1067/*
1068 * Check permission.  The two basic access models of FUSE are:
1069 *
1070 * 1) Local access checking ('default_permissions' mount option) based
1071 * on file mode.  This is the plain old disk filesystem permission
1072 * modell.
1073 *
1074 * 2) "Remote" access checking, where server is responsible for
1075 * checking permission in each inode operation.  An exception to this
1076 * is if ->permission() was invoked from sys_access() in which case an
1077 * access request is sent.  Execute permission is still checked
1078 * locally based on file mode.
1079 */
1080static int fuse_permission(struct inode *inode, int mask)
1081{
1082	struct fuse_conn *fc = get_fuse_conn(inode);
1083	bool refreshed = false;
1084	int err = 0;
1085
1086	if (!fuse_allow_current_process(fc))
1087		return -EACCES;
1088
1089	/*
1090	 * If attributes are needed, refresh them before proceeding
1091	 */
1092	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1093	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1094		struct fuse_inode *fi = get_fuse_inode(inode);
1095
1096		if (time_before64(fi->i_time, get_jiffies_64())) {
1097			refreshed = true;
1098
1099			err = fuse_perm_getattr(inode, mask);
1100			if (err)
1101				return err;
1102		}
1103	}
1104
1105	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1106		err = generic_permission(inode, mask);
1107
1108		/* If permission is denied, try to refresh file
1109		   attributes.  This is also needed, because the root
1110		   node will at first have no permissions */
1111		if (err == -EACCES && !refreshed) {
1112			err = fuse_perm_getattr(inode, mask);
1113			if (!err)
1114				err = generic_permission(inode, mask);
1115		}
1116
1117		/* Note: the opposite of the above test does not
1118		   exist.  So if permissions are revoked this won't be
1119		   noticed immediately, only after the attribute
1120		   timeout has expired */
1121	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1122		err = fuse_access(inode, mask);
1123	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1124		if (!(inode->i_mode & S_IXUGO)) {
1125			if (refreshed)
1126				return -EACCES;
1127
1128			err = fuse_perm_getattr(inode, mask);
1129			if (!err && !(inode->i_mode & S_IXUGO))
1130				return -EACCES;
1131		}
1132	}
1133	return err;
1134}
1135
1136static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1137			 struct dir_context *ctx)
1138{
1139	while (nbytes >= FUSE_NAME_OFFSET) {
1140		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
1141		size_t reclen = FUSE_DIRENT_SIZE(dirent);
1142		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1143			return -EIO;
1144		if (reclen > nbytes)
1145			break;
1146		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1147			return -EIO;
1148
1149		if (!dir_emit(ctx, dirent->name, dirent->namelen,
1150			       dirent->ino, dirent->type))
1151			break;
1152
1153		buf += reclen;
1154		nbytes -= reclen;
1155		ctx->pos = dirent->off;
1156	}
1157
1158	return 0;
1159}
1160
1161static int fuse_direntplus_link(struct file *file,
1162				struct fuse_direntplus *direntplus,
1163				u64 attr_version)
1164{
1165	int err;
1166	struct fuse_entry_out *o = &direntplus->entry_out;
1167	struct fuse_dirent *dirent = &direntplus->dirent;
1168	struct dentry *parent = file->f_path.dentry;
1169	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1170	struct dentry *dentry;
1171	struct dentry *alias;
1172	struct inode *dir = d_inode(parent);
1173	struct fuse_conn *fc;
1174	struct inode *inode;
1175
1176	if (!o->nodeid) {
1177		/*
1178		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1179		 * ENOENT. Instead, it only means the userspace filesystem did
1180		 * not want to return attributes/handle for this entry.
1181		 *
1182		 * So do nothing.
1183		 */
1184		return 0;
1185	}
1186
1187	if (name.name[0] == '.') {
1188		/*
1189		 * We could potentially refresh the attributes of the directory
1190		 * and its parent?
1191		 */
1192		if (name.len == 1)
1193			return 0;
1194		if (name.name[1] == '.' && name.len == 2)
1195			return 0;
1196	}
1197
1198	if (invalid_nodeid(o->nodeid))
1199		return -EIO;
1200	if (!fuse_valid_type(o->attr.mode))
1201		return -EIO;
1202
1203	fc = get_fuse_conn(dir);
1204
1205	name.hash = full_name_hash(name.name, name.len);
1206	dentry = d_lookup(parent, &name);
1207	if (dentry) {
1208		inode = d_inode(dentry);
1209		if (!inode) {
1210			d_drop(dentry);
1211		} else if (get_node_id(inode) != o->nodeid ||
1212			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1213			d_invalidate(dentry);
1214		} else if (is_bad_inode(inode)) {
1215			err = -EIO;
1216			goto out;
1217		} else {
1218			struct fuse_inode *fi;
1219			fi = get_fuse_inode(inode);
1220			spin_lock(&fc->lock);
1221			fi->nlookup++;
1222			spin_unlock(&fc->lock);
1223
1224			fuse_change_attributes(inode, &o->attr,
1225					       entry_attr_timeout(o),
1226					       attr_version);
1227
1228			/*
1229			 * The other branch to 'found' comes via fuse_iget()
1230			 * which bumps nlookup inside
1231			 */
1232			goto found;
1233		}
1234		dput(dentry);
1235	}
1236
1237	dentry = d_alloc(parent, &name);
1238	err = -ENOMEM;
1239	if (!dentry)
1240		goto out;
1241
1242	inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1243			  &o->attr, entry_attr_timeout(o), attr_version);
1244	if (!inode)
1245		goto out;
1246
1247	alias = d_splice_alias(inode, dentry);
1248	err = PTR_ERR(alias);
1249	if (IS_ERR(alias))
1250		goto out;
1251
1252	if (alias) {
1253		dput(dentry);
1254		dentry = alias;
1255	}
1256
1257found:
1258	if (fc->readdirplus_auto)
1259		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
1260	fuse_change_entry_timeout(dentry, o);
1261
1262	err = 0;
1263out:
1264	dput(dentry);
1265	return err;
1266}
1267
1268static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1269			     struct dir_context *ctx, u64 attr_version)
1270{
1271	struct fuse_direntplus *direntplus;
1272	struct fuse_dirent *dirent;
1273	size_t reclen;
1274	int over = 0;
1275	int ret;
1276
1277	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1278		direntplus = (struct fuse_direntplus *) buf;
1279		dirent = &direntplus->dirent;
1280		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1281
1282		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1283			return -EIO;
1284		if (reclen > nbytes)
1285			break;
1286		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
1287			return -EIO;
1288
1289		if (!over) {
1290			/* We fill entries into dstbuf only as much as
1291			   it can hold. But we still continue iterating
1292			   over remaining entries to link them. If not,
1293			   we need to send a FORGET for each of those
1294			   which we did not link.
1295			*/
1296			over = !dir_emit(ctx, dirent->name, dirent->namelen,
1297				       dirent->ino, dirent->type);
1298			ctx->pos = dirent->off;
1299		}
1300
1301		buf += reclen;
1302		nbytes -= reclen;
1303
1304		ret = fuse_direntplus_link(file, direntplus, attr_version);
1305		if (ret)
1306			fuse_force_forget(file, direntplus->entry_out.nodeid);
1307	}
1308
1309	return 0;
1310}
1311
1312static int fuse_readdir(struct file *file, struct dir_context *ctx)
1313{
1314	int plus, err;
1315	size_t nbytes;
1316	struct page *page;
1317	struct inode *inode = file_inode(file);
1318	struct fuse_conn *fc = get_fuse_conn(inode);
1319	struct fuse_req *req;
1320	u64 attr_version = 0;
1321
1322	if (is_bad_inode(inode))
1323		return -EIO;
1324
1325	req = fuse_get_req(fc, 1);
1326	if (IS_ERR(req))
1327		return PTR_ERR(req);
1328
1329	page = alloc_page(GFP_KERNEL);
1330	if (!page) {
1331		fuse_put_request(fc, req);
1332		return -ENOMEM;
1333	}
1334
1335	plus = fuse_use_readdirplus(inode, ctx);
1336	req->out.argpages = 1;
1337	req->num_pages = 1;
1338	req->pages[0] = page;
1339	req->page_descs[0].length = PAGE_SIZE;
1340	if (plus) {
1341		attr_version = fuse_get_attr_version(fc);
1342		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1343			       FUSE_READDIRPLUS);
1344	} else {
1345		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
1346			       FUSE_READDIR);
1347	}
1348	fuse_request_send(fc, req);
1349	nbytes = req->out.args[0].size;
1350	err = req->out.h.error;
1351	fuse_put_request(fc, req);
1352	if (!err) {
1353		if (plus) {
1354			err = parse_dirplusfile(page_address(page), nbytes,
1355						file, ctx,
1356						attr_version);
1357		} else {
1358			err = parse_dirfile(page_address(page), nbytes, file,
1359					    ctx);
1360		}
1361	}
1362
1363	__free_page(page);
1364	fuse_invalidate_atime(inode);
1365	return err;
1366}
1367
1368static char *read_link(struct dentry *dentry)
1369{
1370	struct inode *inode = d_inode(dentry);
1371	struct fuse_conn *fc = get_fuse_conn(inode);
1372	FUSE_ARGS(args);
1373	char *link;
1374	ssize_t ret;
1375
1376	link = (char *) __get_free_page(GFP_KERNEL);
1377	if (!link)
1378		return ERR_PTR(-ENOMEM);
1379
1380	args.in.h.opcode = FUSE_READLINK;
1381	args.in.h.nodeid = get_node_id(inode);
1382	args.out.argvar = 1;
1383	args.out.numargs = 1;
1384	args.out.args[0].size = PAGE_SIZE - 1;
1385	args.out.args[0].value = link;
1386	ret = fuse_simple_request(fc, &args);
1387	if (ret < 0) {
1388		free_page((unsigned long) link);
1389		link = ERR_PTR(ret);
1390	} else {
1391		link[ret] = '\0';
1392	}
1393	fuse_invalidate_atime(inode);
1394	return link;
1395}
1396
1397static void free_link(char *link)
1398{
1399	if (!IS_ERR(link))
1400		free_page((unsigned long) link);
1401}
1402
1403static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
1404{
1405	nd_set_link(nd, read_link(dentry));
1406	return NULL;
1407}
1408
1409static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
1410{
1411	free_link(nd_get_link(nd));
1412}
1413
1414static int fuse_dir_open(struct inode *inode, struct file *file)
1415{
1416	return fuse_open_common(inode, file, true);
1417}
1418
1419static int fuse_dir_release(struct inode *inode, struct file *file)
1420{
1421	fuse_release_common(file, FUSE_RELEASEDIR);
1422
1423	return 0;
1424}
1425
1426static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1427			  int datasync)
1428{
1429	return fuse_fsync_common(file, start, end, datasync, 1);
1430}
1431
1432static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1433			    unsigned long arg)
1434{
1435	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1436
1437	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1438	if (fc->minor < 18)
1439		return -ENOTTY;
1440
1441	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1442}
1443
1444static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1445				   unsigned long arg)
1446{
1447	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1448
1449	if (fc->minor < 18)
1450		return -ENOTTY;
1451
1452	return fuse_ioctl_common(file, cmd, arg,
1453				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1454}
1455
1456static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1457{
1458	/* Always update if mtime is explicitly set  */
1459	if (ivalid & ATTR_MTIME_SET)
1460		return true;
1461
1462	/* Or if kernel i_mtime is the official one */
1463	if (trust_local_mtime)
1464		return true;
1465
1466	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1467	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1468		return false;
1469
1470	/* In all other cases update */
1471	return true;
1472}
1473
1474static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1475			   bool trust_local_cmtime)
1476{
1477	unsigned ivalid = iattr->ia_valid;
1478
1479	if (ivalid & ATTR_MODE)
1480		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1481	if (ivalid & ATTR_UID)
1482		arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1483	if (ivalid & ATTR_GID)
1484		arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1485	if (ivalid & ATTR_SIZE)
1486		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1487	if (ivalid & ATTR_ATIME) {
1488		arg->valid |= FATTR_ATIME;
1489		arg->atime = iattr->ia_atime.tv_sec;
1490		arg->atimensec = iattr->ia_atime.tv_nsec;
1491		if (!(ivalid & ATTR_ATIME_SET))
1492			arg->valid |= FATTR_ATIME_NOW;
1493	}
1494	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1495		arg->valid |= FATTR_MTIME;
1496		arg->mtime = iattr->ia_mtime.tv_sec;
1497		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1498		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1499			arg->valid |= FATTR_MTIME_NOW;
1500	}
1501	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1502		arg->valid |= FATTR_CTIME;
1503		arg->ctime = iattr->ia_ctime.tv_sec;
1504		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1505	}
1506}
1507
1508/*
1509 * Prevent concurrent writepages on inode
1510 *
1511 * This is done by adding a negative bias to the inode write counter
1512 * and waiting for all pending writes to finish.
1513 */
1514void fuse_set_nowrite(struct inode *inode)
1515{
1516	struct fuse_conn *fc = get_fuse_conn(inode);
1517	struct fuse_inode *fi = get_fuse_inode(inode);
1518
1519	BUG_ON(!mutex_is_locked(&inode->i_mutex));
1520
1521	spin_lock(&fc->lock);
1522	BUG_ON(fi->writectr < 0);
1523	fi->writectr += FUSE_NOWRITE;
1524	spin_unlock(&fc->lock);
1525	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1526}
1527
1528/*
1529 * Allow writepages on inode
1530 *
1531 * Remove the bias from the writecounter and send any queued
1532 * writepages.
1533 */
1534static void __fuse_release_nowrite(struct inode *inode)
1535{
1536	struct fuse_inode *fi = get_fuse_inode(inode);
1537
1538	BUG_ON(fi->writectr != FUSE_NOWRITE);
1539	fi->writectr = 0;
1540	fuse_flush_writepages(inode);
1541}
1542
1543void fuse_release_nowrite(struct inode *inode)
1544{
1545	struct fuse_conn *fc = get_fuse_conn(inode);
1546
1547	spin_lock(&fc->lock);
1548	__fuse_release_nowrite(inode);
1549	spin_unlock(&fc->lock);
1550}
1551
1552static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1553			      struct inode *inode,
1554			      struct fuse_setattr_in *inarg_p,
1555			      struct fuse_attr_out *outarg_p)
1556{
1557	args->in.h.opcode = FUSE_SETATTR;
1558	args->in.h.nodeid = get_node_id(inode);
1559	args->in.numargs = 1;
1560	args->in.args[0].size = sizeof(*inarg_p);
1561	args->in.args[0].value = inarg_p;
1562	args->out.numargs = 1;
1563	args->out.args[0].size = sizeof(*outarg_p);
1564	args->out.args[0].value = outarg_p;
1565}
1566
1567/*
1568 * Flush inode->i_mtime to the server
1569 */
1570int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1571{
1572	struct fuse_conn *fc = get_fuse_conn(inode);
1573	FUSE_ARGS(args);
1574	struct fuse_setattr_in inarg;
1575	struct fuse_attr_out outarg;
1576
1577	memset(&inarg, 0, sizeof(inarg));
1578	memset(&outarg, 0, sizeof(outarg));
1579
1580	inarg.valid = FATTR_MTIME;
1581	inarg.mtime = inode->i_mtime.tv_sec;
1582	inarg.mtimensec = inode->i_mtime.tv_nsec;
1583	if (fc->minor >= 23) {
1584		inarg.valid |= FATTR_CTIME;
1585		inarg.ctime = inode->i_ctime.tv_sec;
1586		inarg.ctimensec = inode->i_ctime.tv_nsec;
1587	}
1588	if (ff) {
1589		inarg.valid |= FATTR_FH;
1590		inarg.fh = ff->fh;
1591	}
1592	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1593
1594	return fuse_simple_request(fc, &args);
1595}
1596
1597/*
1598 * Set attributes, and at the same time refresh them.
1599 *
1600 * Truncation is slightly complicated, because the 'truncate' request
1601 * may fail, in which case we don't want to touch the mapping.
1602 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1603 * and the actual truncation by hand.
1604 */
1605int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1606		    struct file *file)
1607{
1608	struct fuse_conn *fc = get_fuse_conn(inode);
1609	struct fuse_inode *fi = get_fuse_inode(inode);
1610	FUSE_ARGS(args);
1611	struct fuse_setattr_in inarg;
1612	struct fuse_attr_out outarg;
1613	bool is_truncate = false;
1614	bool is_wb = fc->writeback_cache;
1615	loff_t oldsize;
1616	int err;
1617	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1618
1619	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1620		attr->ia_valid |= ATTR_FORCE;
1621
1622	err = inode_change_ok(inode, attr);
1623	if (err)
1624		return err;
1625
1626	if (attr->ia_valid & ATTR_OPEN) {
1627		if (fc->atomic_o_trunc)
1628			return 0;
1629		file = NULL;
1630	}
1631
1632	if (attr->ia_valid & ATTR_SIZE)
1633		is_truncate = true;
1634
1635	if (is_truncate) {
1636		fuse_set_nowrite(inode);
1637		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1638		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1639			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1640	}
1641
1642	memset(&inarg, 0, sizeof(inarg));
1643	memset(&outarg, 0, sizeof(outarg));
1644	iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1645	if (file) {
1646		struct fuse_file *ff = file->private_data;
1647		inarg.valid |= FATTR_FH;
1648		inarg.fh = ff->fh;
1649	}
1650	if (attr->ia_valid & ATTR_SIZE) {
1651		/* For mandatory locking in truncate */
1652		inarg.valid |= FATTR_LOCKOWNER;
1653		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1654	}
1655	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1656	err = fuse_simple_request(fc, &args);
1657	if (err) {
1658		if (err == -EINTR)
1659			fuse_invalidate_attr(inode);
1660		goto error;
1661	}
1662
1663	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1664		make_bad_inode(inode);
1665		err = -EIO;
1666		goto error;
1667	}
1668
1669	spin_lock(&fc->lock);
1670	/* the kernel maintains i_mtime locally */
1671	if (trust_local_cmtime) {
1672		if (attr->ia_valid & ATTR_MTIME)
1673			inode->i_mtime = attr->ia_mtime;
1674		if (attr->ia_valid & ATTR_CTIME)
1675			inode->i_ctime = attr->ia_ctime;
1676		/* FIXME: clear I_DIRTY_SYNC? */
1677	}
1678
1679	fuse_change_attributes_common(inode, &outarg.attr,
1680				      attr_timeout(&outarg));
1681	oldsize = inode->i_size;
1682	/* see the comment in fuse_change_attributes() */
1683	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1684		i_size_write(inode, outarg.attr.size);
1685
1686	if (is_truncate) {
1687		/* NOTE: this may release/reacquire fc->lock */
1688		__fuse_release_nowrite(inode);
1689	}
1690	spin_unlock(&fc->lock);
1691
1692	/*
1693	 * Only call invalidate_inode_pages2() after removing
1694	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1695	 */
1696	if ((is_truncate || !is_wb) &&
1697	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1698		truncate_pagecache(inode, outarg.attr.size);
1699		invalidate_inode_pages2(inode->i_mapping);
1700	}
1701
1702	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1703	return 0;
1704
1705error:
1706	if (is_truncate)
1707		fuse_release_nowrite(inode);
1708
1709	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1710	return err;
1711}
1712
1713static int fuse_setattr(struct dentry *entry, struct iattr *attr)
1714{
1715	struct inode *inode = d_inode(entry);
1716
1717	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1718		return -EACCES;
1719
1720	if (attr->ia_valid & ATTR_FILE)
1721		return fuse_do_setattr(inode, attr, attr->ia_file);
1722	else
1723		return fuse_do_setattr(inode, attr, NULL);
1724}
1725
1726static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1727			struct kstat *stat)
1728{
1729	struct inode *inode = d_inode(entry);
1730	struct fuse_conn *fc = get_fuse_conn(inode);
1731
1732	if (!fuse_allow_current_process(fc))
1733		return -EACCES;
1734
1735	return fuse_update_attributes(inode, stat, NULL, NULL);
1736}
1737
1738static int fuse_setxattr(struct dentry *entry, const char *name,
1739			 const void *value, size_t size, int flags)
1740{
1741	struct inode *inode = d_inode(entry);
1742	struct fuse_conn *fc = get_fuse_conn(inode);
1743	FUSE_ARGS(args);
1744	struct fuse_setxattr_in inarg;
1745	int err;
1746
1747	if (fc->no_setxattr)
1748		return -EOPNOTSUPP;
1749
1750	memset(&inarg, 0, sizeof(inarg));
1751	inarg.size = size;
1752	inarg.flags = flags;
1753	args.in.h.opcode = FUSE_SETXATTR;
1754	args.in.h.nodeid = get_node_id(inode);
1755	args.in.numargs = 3;
1756	args.in.args[0].size = sizeof(inarg);
1757	args.in.args[0].value = &inarg;
1758	args.in.args[1].size = strlen(name) + 1;
1759	args.in.args[1].value = name;
1760	args.in.args[2].size = size;
1761	args.in.args[2].value = value;
1762	err = fuse_simple_request(fc, &args);
1763	if (err == -ENOSYS) {
1764		fc->no_setxattr = 1;
1765		err = -EOPNOTSUPP;
1766	}
1767	if (!err) {
1768		fuse_invalidate_attr(inode);
1769		fuse_update_ctime(inode);
1770	}
1771	return err;
1772}
1773
1774static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1775			     void *value, size_t size)
1776{
1777	struct inode *inode = d_inode(entry);
1778	struct fuse_conn *fc = get_fuse_conn(inode);
1779	FUSE_ARGS(args);
1780	struct fuse_getxattr_in inarg;
1781	struct fuse_getxattr_out outarg;
1782	ssize_t ret;
1783
1784	if (fc->no_getxattr)
1785		return -EOPNOTSUPP;
1786
1787	memset(&inarg, 0, sizeof(inarg));
1788	inarg.size = size;
1789	args.in.h.opcode = FUSE_GETXATTR;
1790	args.in.h.nodeid = get_node_id(inode);
1791	args.in.numargs = 2;
1792	args.in.args[0].size = sizeof(inarg);
1793	args.in.args[0].value = &inarg;
1794	args.in.args[1].size = strlen(name) + 1;
1795	args.in.args[1].value = name;
1796	/* This is really two different operations rolled into one */
1797	args.out.numargs = 1;
1798	if (size) {
1799		args.out.argvar = 1;
1800		args.out.args[0].size = size;
1801		args.out.args[0].value = value;
1802	} else {
1803		args.out.args[0].size = sizeof(outarg);
1804		args.out.args[0].value = &outarg;
1805	}
1806	ret = fuse_simple_request(fc, &args);
1807	if (!ret && !size)
1808		ret = outarg.size;
1809	if (ret == -ENOSYS) {
1810		fc->no_getxattr = 1;
1811		ret = -EOPNOTSUPP;
1812	}
1813	return ret;
1814}
1815
1816static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1817{
1818	struct inode *inode = d_inode(entry);
1819	struct fuse_conn *fc = get_fuse_conn(inode);
1820	FUSE_ARGS(args);
1821	struct fuse_getxattr_in inarg;
1822	struct fuse_getxattr_out outarg;
1823	ssize_t ret;
1824
1825	if (!fuse_allow_current_process(fc))
1826		return -EACCES;
1827
1828	if (fc->no_listxattr)
1829		return -EOPNOTSUPP;
1830
1831	memset(&inarg, 0, sizeof(inarg));
1832	inarg.size = size;
1833	args.in.h.opcode = FUSE_LISTXATTR;
1834	args.in.h.nodeid = get_node_id(inode);
1835	args.in.numargs = 1;
1836	args.in.args[0].size = sizeof(inarg);
1837	args.in.args[0].value = &inarg;
1838	/* This is really two different operations rolled into one */
1839	args.out.numargs = 1;
1840	if (size) {
1841		args.out.argvar = 1;
1842		args.out.args[0].size = size;
1843		args.out.args[0].value = list;
1844	} else {
1845		args.out.args[0].size = sizeof(outarg);
1846		args.out.args[0].value = &outarg;
1847	}
1848	ret = fuse_simple_request(fc, &args);
1849	if (!ret && !size)
1850		ret = outarg.size;
1851	if (ret == -ENOSYS) {
1852		fc->no_listxattr = 1;
1853		ret = -EOPNOTSUPP;
1854	}
1855	return ret;
1856}
1857
1858static int fuse_removexattr(struct dentry *entry, const char *name)
1859{
1860	struct inode *inode = d_inode(entry);
1861	struct fuse_conn *fc = get_fuse_conn(inode);
1862	FUSE_ARGS(args);
1863	int err;
1864
1865	if (fc->no_removexattr)
1866		return -EOPNOTSUPP;
1867
1868	args.in.h.opcode = FUSE_REMOVEXATTR;
1869	args.in.h.nodeid = get_node_id(inode);
1870	args.in.numargs = 1;
1871	args.in.args[0].size = strlen(name) + 1;
1872	args.in.args[0].value = name;
1873	err = fuse_simple_request(fc, &args);
1874	if (err == -ENOSYS) {
1875		fc->no_removexattr = 1;
1876		err = -EOPNOTSUPP;
1877	}
1878	if (!err) {
1879		fuse_invalidate_attr(inode);
1880		fuse_update_ctime(inode);
1881	}
1882	return err;
1883}
1884
1885static const struct inode_operations fuse_dir_inode_operations = {
1886	.lookup		= fuse_lookup,
1887	.mkdir		= fuse_mkdir,
1888	.symlink	= fuse_symlink,
1889	.unlink		= fuse_unlink,
1890	.rmdir		= fuse_rmdir,
1891	.rename2	= fuse_rename2,
1892	.link		= fuse_link,
1893	.setattr	= fuse_setattr,
1894	.create		= fuse_create,
1895	.atomic_open	= fuse_atomic_open,
1896	.mknod		= fuse_mknod,
1897	.permission	= fuse_permission,
1898	.getattr	= fuse_getattr,
1899	.setxattr	= fuse_setxattr,
1900	.getxattr	= fuse_getxattr,
1901	.listxattr	= fuse_listxattr,
1902	.removexattr	= fuse_removexattr,
1903};
1904
1905static const struct file_operations fuse_dir_operations = {
1906	.llseek		= generic_file_llseek,
1907	.read		= generic_read_dir,
1908	.iterate	= fuse_readdir,
1909	.open		= fuse_dir_open,
1910	.release	= fuse_dir_release,
1911	.fsync		= fuse_dir_fsync,
1912	.unlocked_ioctl	= fuse_dir_ioctl,
1913	.compat_ioctl	= fuse_dir_compat_ioctl,
1914};
1915
1916static const struct inode_operations fuse_common_inode_operations = {
1917	.setattr	= fuse_setattr,
1918	.permission	= fuse_permission,
1919	.getattr	= fuse_getattr,
1920	.setxattr	= fuse_setxattr,
1921	.getxattr	= fuse_getxattr,
1922	.listxattr	= fuse_listxattr,
1923	.removexattr	= fuse_removexattr,
1924};
1925
1926static const struct inode_operations fuse_symlink_inode_operations = {
1927	.setattr	= fuse_setattr,
1928	.follow_link	= fuse_follow_link,
1929	.put_link	= fuse_put_link,
1930	.readlink	= generic_readlink,
1931	.getattr	= fuse_getattr,
1932	.setxattr	= fuse_setxattr,
1933	.getxattr	= fuse_getxattr,
1934	.listxattr	= fuse_listxattr,
1935	.removexattr	= fuse_removexattr,
1936};
1937
1938void fuse_init_common(struct inode *inode)
1939{
1940	inode->i_op = &fuse_common_inode_operations;
1941}
1942
1943void fuse_init_dir(struct inode *inode)
1944{
1945	inode->i_op = &fuse_dir_inode_operations;
1946	inode->i_fop = &fuse_dir_operations;
1947}
1948
1949void fuse_init_symlink(struct inode *inode)
1950{
1951	inode->i_op = &fuse_symlink_inode_operations;
1952}
1953