1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <jroedel@suse.de>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17 */
18
19#define pr_fmt(fmt)    "%s: " fmt, __func__
20
21#include <linux/device.h>
22#include <linux/kernel.h>
23#include <linux/bug.h>
24#include <linux/types.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <linux/errno.h>
28#include <linux/iommu.h>
29#include <linux/idr.h>
30#include <linux/notifier.h>
31#include <linux/err.h>
32#include <linux/pci.h>
33#include <linux/bitops.h>
34#include <trace/events/iommu.h>
35
36static struct kset *iommu_group_kset;
37static struct ida iommu_group_ida;
38static struct mutex iommu_group_mutex;
39
40struct iommu_callback_data {
41	const struct iommu_ops *ops;
42};
43
44struct iommu_group {
45	struct kobject kobj;
46	struct kobject *devices_kobj;
47	struct list_head devices;
48	struct mutex mutex;
49	struct blocking_notifier_head notifier;
50	void *iommu_data;
51	void (*iommu_data_release)(void *iommu_data);
52	char *name;
53	int id;
54};
55
56struct iommu_device {
57	struct list_head list;
58	struct device *dev;
59	char *name;
60};
61
62struct iommu_group_attribute {
63	struct attribute attr;
64	ssize_t (*show)(struct iommu_group *group, char *buf);
65	ssize_t (*store)(struct iommu_group *group,
66			 const char *buf, size_t count);
67};
68
69#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
70struct iommu_group_attribute iommu_group_attr_##_name =		\
71	__ATTR(_name, _mode, _show, _store)
72
73#define to_iommu_group_attr(_attr)	\
74	container_of(_attr, struct iommu_group_attribute, attr)
75#define to_iommu_group(_kobj)		\
76	container_of(_kobj, struct iommu_group, kobj)
77
78static ssize_t iommu_group_attr_show(struct kobject *kobj,
79				     struct attribute *__attr, char *buf)
80{
81	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
82	struct iommu_group *group = to_iommu_group(kobj);
83	ssize_t ret = -EIO;
84
85	if (attr->show)
86		ret = attr->show(group, buf);
87	return ret;
88}
89
90static ssize_t iommu_group_attr_store(struct kobject *kobj,
91				      struct attribute *__attr,
92				      const char *buf, size_t count)
93{
94	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
95	struct iommu_group *group = to_iommu_group(kobj);
96	ssize_t ret = -EIO;
97
98	if (attr->store)
99		ret = attr->store(group, buf, count);
100	return ret;
101}
102
103static const struct sysfs_ops iommu_group_sysfs_ops = {
104	.show = iommu_group_attr_show,
105	.store = iommu_group_attr_store,
106};
107
108static int iommu_group_create_file(struct iommu_group *group,
109				   struct iommu_group_attribute *attr)
110{
111	return sysfs_create_file(&group->kobj, &attr->attr);
112}
113
114static void iommu_group_remove_file(struct iommu_group *group,
115				    struct iommu_group_attribute *attr)
116{
117	sysfs_remove_file(&group->kobj, &attr->attr);
118}
119
120static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
121{
122	return sprintf(buf, "%s\n", group->name);
123}
124
125static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
126
127static void iommu_group_release(struct kobject *kobj)
128{
129	struct iommu_group *group = to_iommu_group(kobj);
130
131	if (group->iommu_data_release)
132		group->iommu_data_release(group->iommu_data);
133
134	mutex_lock(&iommu_group_mutex);
135	ida_remove(&iommu_group_ida, group->id);
136	mutex_unlock(&iommu_group_mutex);
137
138	kfree(group->name);
139	kfree(group);
140}
141
142static struct kobj_type iommu_group_ktype = {
143	.sysfs_ops = &iommu_group_sysfs_ops,
144	.release = iommu_group_release,
145};
146
147/**
148 * iommu_group_alloc - Allocate a new group
149 * @name: Optional name to associate with group, visible in sysfs
150 *
151 * This function is called by an iommu driver to allocate a new iommu
152 * group.  The iommu group represents the minimum granularity of the iommu.
153 * Upon successful return, the caller holds a reference to the supplied
154 * group in order to hold the group until devices are added.  Use
155 * iommu_group_put() to release this extra reference count, allowing the
156 * group to be automatically reclaimed once it has no devices or external
157 * references.
158 */
159struct iommu_group *iommu_group_alloc(void)
160{
161	struct iommu_group *group;
162	int ret;
163
164	group = kzalloc(sizeof(*group), GFP_KERNEL);
165	if (!group)
166		return ERR_PTR(-ENOMEM);
167
168	group->kobj.kset = iommu_group_kset;
169	mutex_init(&group->mutex);
170	INIT_LIST_HEAD(&group->devices);
171	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
172
173	mutex_lock(&iommu_group_mutex);
174
175again:
176	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
177		kfree(group);
178		mutex_unlock(&iommu_group_mutex);
179		return ERR_PTR(-ENOMEM);
180	}
181
182	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
183		goto again;
184
185	mutex_unlock(&iommu_group_mutex);
186
187	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
188				   NULL, "%d", group->id);
189	if (ret) {
190		mutex_lock(&iommu_group_mutex);
191		ida_remove(&iommu_group_ida, group->id);
192		mutex_unlock(&iommu_group_mutex);
193		kfree(group);
194		return ERR_PTR(ret);
195	}
196
197	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
198	if (!group->devices_kobj) {
199		kobject_put(&group->kobj); /* triggers .release & free */
200		return ERR_PTR(-ENOMEM);
201	}
202
203	/*
204	 * The devices_kobj holds a reference on the group kobject, so
205	 * as long as that exists so will the group.  We can therefore
206	 * use the devices_kobj for reference counting.
207	 */
208	kobject_put(&group->kobj);
209
210	return group;
211}
212EXPORT_SYMBOL_GPL(iommu_group_alloc);
213
214struct iommu_group *iommu_group_get_by_id(int id)
215{
216	struct kobject *group_kobj;
217	struct iommu_group *group;
218	const char *name;
219
220	if (!iommu_group_kset)
221		return NULL;
222
223	name = kasprintf(GFP_KERNEL, "%d", id);
224	if (!name)
225		return NULL;
226
227	group_kobj = kset_find_obj(iommu_group_kset, name);
228	kfree(name);
229
230	if (!group_kobj)
231		return NULL;
232
233	group = container_of(group_kobj, struct iommu_group, kobj);
234	BUG_ON(group->id != id);
235
236	kobject_get(group->devices_kobj);
237	kobject_put(&group->kobj);
238
239	return group;
240}
241EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
242
243/**
244 * iommu_group_get_iommudata - retrieve iommu_data registered for a group
245 * @group: the group
246 *
247 * iommu drivers can store data in the group for use when doing iommu
248 * operations.  This function provides a way to retrieve it.  Caller
249 * should hold a group reference.
250 */
251void *iommu_group_get_iommudata(struct iommu_group *group)
252{
253	return group->iommu_data;
254}
255EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
256
257/**
258 * iommu_group_set_iommudata - set iommu_data for a group
259 * @group: the group
260 * @iommu_data: new data
261 * @release: release function for iommu_data
262 *
263 * iommu drivers can store data in the group for use when doing iommu
264 * operations.  This function provides a way to set the data after
265 * the group has been allocated.  Caller should hold a group reference.
266 */
267void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
268			       void (*release)(void *iommu_data))
269{
270	group->iommu_data = iommu_data;
271	group->iommu_data_release = release;
272}
273EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
274
275/**
276 * iommu_group_set_name - set name for a group
277 * @group: the group
278 * @name: name
279 *
280 * Allow iommu driver to set a name for a group.  When set it will
281 * appear in a name attribute file under the group in sysfs.
282 */
283int iommu_group_set_name(struct iommu_group *group, const char *name)
284{
285	int ret;
286
287	if (group->name) {
288		iommu_group_remove_file(group, &iommu_group_attr_name);
289		kfree(group->name);
290		group->name = NULL;
291		if (!name)
292			return 0;
293	}
294
295	group->name = kstrdup(name, GFP_KERNEL);
296	if (!group->name)
297		return -ENOMEM;
298
299	ret = iommu_group_create_file(group, &iommu_group_attr_name);
300	if (ret) {
301		kfree(group->name);
302		group->name = NULL;
303		return ret;
304	}
305
306	return 0;
307}
308EXPORT_SYMBOL_GPL(iommu_group_set_name);
309
310/**
311 * iommu_group_add_device - add a device to an iommu group
312 * @group: the group into which to add the device (reference should be held)
313 * @dev: the device
314 *
315 * This function is called by an iommu driver to add a device into a
316 * group.  Adding a device increments the group reference count.
317 */
318int iommu_group_add_device(struct iommu_group *group, struct device *dev)
319{
320	int ret, i = 0;
321	struct iommu_device *device;
322
323	device = kzalloc(sizeof(*device), GFP_KERNEL);
324	if (!device)
325		return -ENOMEM;
326
327	device->dev = dev;
328
329	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
330	if (ret) {
331		kfree(device);
332		return ret;
333	}
334
335	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
336rename:
337	if (!device->name) {
338		sysfs_remove_link(&dev->kobj, "iommu_group");
339		kfree(device);
340		return -ENOMEM;
341	}
342
343	ret = sysfs_create_link_nowarn(group->devices_kobj,
344				       &dev->kobj, device->name);
345	if (ret) {
346		kfree(device->name);
347		if (ret == -EEXIST && i >= 0) {
348			/*
349			 * Account for the slim chance of collision
350			 * and append an instance to the name.
351			 */
352			device->name = kasprintf(GFP_KERNEL, "%s.%d",
353						 kobject_name(&dev->kobj), i++);
354			goto rename;
355		}
356
357		sysfs_remove_link(&dev->kobj, "iommu_group");
358		kfree(device);
359		return ret;
360	}
361
362	kobject_get(group->devices_kobj);
363
364	dev->iommu_group = group;
365
366	mutex_lock(&group->mutex);
367	list_add_tail(&device->list, &group->devices);
368	mutex_unlock(&group->mutex);
369
370	/* Notify any listeners about change to group. */
371	blocking_notifier_call_chain(&group->notifier,
372				     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
373
374	trace_add_device_to_group(group->id, dev);
375	return 0;
376}
377EXPORT_SYMBOL_GPL(iommu_group_add_device);
378
379/**
380 * iommu_group_remove_device - remove a device from it's current group
381 * @dev: device to be removed
382 *
383 * This function is called by an iommu driver to remove the device from
384 * it's current group.  This decrements the iommu group reference count.
385 */
386void iommu_group_remove_device(struct device *dev)
387{
388	struct iommu_group *group = dev->iommu_group;
389	struct iommu_device *tmp_device, *device = NULL;
390
391	/* Pre-notify listeners that a device is being removed. */
392	blocking_notifier_call_chain(&group->notifier,
393				     IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
394
395	mutex_lock(&group->mutex);
396	list_for_each_entry(tmp_device, &group->devices, list) {
397		if (tmp_device->dev == dev) {
398			device = tmp_device;
399			list_del(&device->list);
400			break;
401		}
402	}
403	mutex_unlock(&group->mutex);
404
405	if (!device)
406		return;
407
408	sysfs_remove_link(group->devices_kobj, device->name);
409	sysfs_remove_link(&dev->kobj, "iommu_group");
410
411	trace_remove_device_from_group(group->id, dev);
412
413	kfree(device->name);
414	kfree(device);
415	dev->iommu_group = NULL;
416	kobject_put(group->devices_kobj);
417}
418EXPORT_SYMBOL_GPL(iommu_group_remove_device);
419
420/**
421 * iommu_group_for_each_dev - iterate over each device in the group
422 * @group: the group
423 * @data: caller opaque data to be passed to callback function
424 * @fn: caller supplied callback function
425 *
426 * This function is called by group users to iterate over group devices.
427 * Callers should hold a reference count to the group during callback.
428 * The group->mutex is held across callbacks, which will block calls to
429 * iommu_group_add/remove_device.
430 */
431int iommu_group_for_each_dev(struct iommu_group *group, void *data,
432			     int (*fn)(struct device *, void *))
433{
434	struct iommu_device *device;
435	int ret = 0;
436
437	mutex_lock(&group->mutex);
438	list_for_each_entry(device, &group->devices, list) {
439		ret = fn(device->dev, data);
440		if (ret)
441			break;
442	}
443	mutex_unlock(&group->mutex);
444	return ret;
445}
446EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
447
448/**
449 * iommu_group_get - Return the group for a device and increment reference
450 * @dev: get the group that this device belongs to
451 *
452 * This function is called by iommu drivers and users to get the group
453 * for the specified device.  If found, the group is returned and the group
454 * reference in incremented, else NULL.
455 */
456struct iommu_group *iommu_group_get(struct device *dev)
457{
458	struct iommu_group *group = dev->iommu_group;
459
460	if (group)
461		kobject_get(group->devices_kobj);
462
463	return group;
464}
465EXPORT_SYMBOL_GPL(iommu_group_get);
466
467/**
468 * iommu_group_put - Decrement group reference
469 * @group: the group to use
470 *
471 * This function is called by iommu drivers and users to release the
472 * iommu group.  Once the reference count is zero, the group is released.
473 */
474void iommu_group_put(struct iommu_group *group)
475{
476	if (group)
477		kobject_put(group->devices_kobj);
478}
479EXPORT_SYMBOL_GPL(iommu_group_put);
480
481/**
482 * iommu_group_register_notifier - Register a notifier for group changes
483 * @group: the group to watch
484 * @nb: notifier block to signal
485 *
486 * This function allows iommu group users to track changes in a group.
487 * See include/linux/iommu.h for actions sent via this notifier.  Caller
488 * should hold a reference to the group throughout notifier registration.
489 */
490int iommu_group_register_notifier(struct iommu_group *group,
491				  struct notifier_block *nb)
492{
493	return blocking_notifier_chain_register(&group->notifier, nb);
494}
495EXPORT_SYMBOL_GPL(iommu_group_register_notifier);
496
497/**
498 * iommu_group_unregister_notifier - Unregister a notifier
499 * @group: the group to watch
500 * @nb: notifier block to signal
501 *
502 * Unregister a previously registered group notifier block.
503 */
504int iommu_group_unregister_notifier(struct iommu_group *group,
505				    struct notifier_block *nb)
506{
507	return blocking_notifier_chain_unregister(&group->notifier, nb);
508}
509EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
510
511/**
512 * iommu_group_id - Return ID for a group
513 * @group: the group to ID
514 *
515 * Return the unique ID for the group matching the sysfs group number.
516 */
517int iommu_group_id(struct iommu_group *group)
518{
519	return group->id;
520}
521EXPORT_SYMBOL_GPL(iommu_group_id);
522
523static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
524					       unsigned long *devfns);
525
526/*
527 * To consider a PCI device isolated, we require ACS to support Source
528 * Validation, Request Redirection, Completer Redirection, and Upstream
529 * Forwarding.  This effectively means that devices cannot spoof their
530 * requester ID, requests and completions cannot be redirected, and all
531 * transactions are forwarded upstream, even as it passes through a
532 * bridge where the target device is downstream.
533 */
534#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
535
536/*
537 * For multifunction devices which are not isolated from each other, find
538 * all the other non-isolated functions and look for existing groups.  For
539 * each function, we also need to look for aliases to or from other devices
540 * that may already have a group.
541 */
542static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
543							unsigned long *devfns)
544{
545	struct pci_dev *tmp = NULL;
546	struct iommu_group *group;
547
548	if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
549		return NULL;
550
551	for_each_pci_dev(tmp) {
552		if (tmp == pdev || tmp->bus != pdev->bus ||
553		    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
554		    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
555			continue;
556
557		group = get_pci_alias_group(tmp, devfns);
558		if (group) {
559			pci_dev_put(tmp);
560			return group;
561		}
562	}
563
564	return NULL;
565}
566
567/*
568 * Look for aliases to or from the given device for exisiting groups.  The
569 * dma_alias_devfn only supports aliases on the same bus, therefore the search
570 * space is quite small (especially since we're really only looking at pcie
571 * device, and therefore only expect multiple slots on the root complex or
572 * downstream switch ports).  It's conceivable though that a pair of
573 * multifunction devices could have aliases between them that would cause a
574 * loop.  To prevent this, we use a bitmap to track where we've been.
575 */
576static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
577					       unsigned long *devfns)
578{
579	struct pci_dev *tmp = NULL;
580	struct iommu_group *group;
581
582	if (test_and_set_bit(pdev->devfn & 0xff, devfns))
583		return NULL;
584
585	group = iommu_group_get(&pdev->dev);
586	if (group)
587		return group;
588
589	for_each_pci_dev(tmp) {
590		if (tmp == pdev || tmp->bus != pdev->bus)
591			continue;
592
593		/* We alias them or they alias us */
594		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
595		     pdev->dma_alias_devfn == tmp->devfn) ||
596		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
597		     tmp->dma_alias_devfn == pdev->devfn)) {
598
599			group = get_pci_alias_group(tmp, devfns);
600			if (group) {
601				pci_dev_put(tmp);
602				return group;
603			}
604
605			group = get_pci_function_alias_group(tmp, devfns);
606			if (group) {
607				pci_dev_put(tmp);
608				return group;
609			}
610		}
611	}
612
613	return NULL;
614}
615
616struct group_for_pci_data {
617	struct pci_dev *pdev;
618	struct iommu_group *group;
619};
620
621/*
622 * DMA alias iterator callback, return the last seen device.  Stop and return
623 * the IOMMU group if we find one along the way.
624 */
625static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
626{
627	struct group_for_pci_data *data = opaque;
628
629	data->pdev = pdev;
630	data->group = iommu_group_get(&pdev->dev);
631
632	return data->group != NULL;
633}
634
635/*
636 * Use standard PCI bus topology, isolation features, and DMA alias quirks
637 * to find or create an IOMMU group for a device.
638 */
639static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
640{
641	struct group_for_pci_data data;
642	struct pci_bus *bus;
643	struct iommu_group *group = NULL;
644	u64 devfns[4] = { 0 };
645
646	/*
647	 * Find the upstream DMA alias for the device.  A device must not
648	 * be aliased due to topology in order to have its own IOMMU group.
649	 * If we find an alias along the way that already belongs to a
650	 * group, use it.
651	 */
652	if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
653		return data.group;
654
655	pdev = data.pdev;
656
657	/*
658	 * Continue upstream from the point of minimum IOMMU granularity
659	 * due to aliases to the point where devices are protected from
660	 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
661	 * group, use it.
662	 */
663	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
664		if (!bus->self)
665			continue;
666
667		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
668			break;
669
670		pdev = bus->self;
671
672		group = iommu_group_get(&pdev->dev);
673		if (group)
674			return group;
675	}
676
677	/*
678	 * Look for existing groups on device aliases.  If we alias another
679	 * device or another device aliases us, use the same group.
680	 */
681	group = get_pci_alias_group(pdev, (unsigned long *)devfns);
682	if (group)
683		return group;
684
685	/*
686	 * Look for existing groups on non-isolated functions on the same
687	 * slot and aliases of those funcions, if any.  No need to clear
688	 * the search bitmap, the tested devfns are still valid.
689	 */
690	group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
691	if (group)
692		return group;
693
694	/* No shared group found, allocate new */
695	return iommu_group_alloc();
696}
697
698/**
699 * iommu_group_get_for_dev - Find or create the IOMMU group for a device
700 * @dev: target device
701 *
702 * This function is intended to be called by IOMMU drivers and extended to
703 * support common, bus-defined algorithms when determining or creating the
704 * IOMMU group for a device.  On success, the caller will hold a reference
705 * to the returned IOMMU group, which will already include the provided
706 * device.  The reference should be released with iommu_group_put().
707 */
708struct iommu_group *iommu_group_get_for_dev(struct device *dev)
709{
710	struct iommu_group *group;
711	int ret;
712
713	group = iommu_group_get(dev);
714	if (group)
715		return group;
716
717	if (!dev_is_pci(dev))
718		return ERR_PTR(-EINVAL);
719
720	group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
721
722	if (IS_ERR(group))
723		return group;
724
725	ret = iommu_group_add_device(group, dev);
726	if (ret) {
727		iommu_group_put(group);
728		return ERR_PTR(ret);
729	}
730
731	return group;
732}
733
734static int add_iommu_group(struct device *dev, void *data)
735{
736	struct iommu_callback_data *cb = data;
737	const struct iommu_ops *ops = cb->ops;
738
739	if (!ops->add_device)
740		return 0;
741
742	WARN_ON(dev->iommu_group);
743
744	ops->add_device(dev);
745
746	return 0;
747}
748
749static int iommu_bus_notifier(struct notifier_block *nb,
750			      unsigned long action, void *data)
751{
752	struct device *dev = data;
753	const struct iommu_ops *ops = dev->bus->iommu_ops;
754	struct iommu_group *group;
755	unsigned long group_action = 0;
756
757	/*
758	 * ADD/DEL call into iommu driver ops if provided, which may
759	 * result in ADD/DEL notifiers to group->notifier
760	 */
761	if (action == BUS_NOTIFY_ADD_DEVICE) {
762		if (ops->add_device)
763			return ops->add_device(dev);
764	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
765		if (ops->remove_device && dev->iommu_group) {
766			ops->remove_device(dev);
767			return 0;
768		}
769	}
770
771	/*
772	 * Remaining BUS_NOTIFYs get filtered and republished to the
773	 * group, if anyone is listening
774	 */
775	group = iommu_group_get(dev);
776	if (!group)
777		return 0;
778
779	switch (action) {
780	case BUS_NOTIFY_BIND_DRIVER:
781		group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
782		break;
783	case BUS_NOTIFY_BOUND_DRIVER:
784		group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
785		break;
786	case BUS_NOTIFY_UNBIND_DRIVER:
787		group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
788		break;
789	case BUS_NOTIFY_UNBOUND_DRIVER:
790		group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
791		break;
792	}
793
794	if (group_action)
795		blocking_notifier_call_chain(&group->notifier,
796					     group_action, dev);
797
798	iommu_group_put(group);
799	return 0;
800}
801
802static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
803{
804	int err;
805	struct notifier_block *nb;
806	struct iommu_callback_data cb = {
807		.ops = ops,
808	};
809
810	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
811	if (!nb)
812		return -ENOMEM;
813
814	nb->notifier_call = iommu_bus_notifier;
815
816	err = bus_register_notifier(bus, nb);
817	if (err) {
818		kfree(nb);
819		return err;
820	}
821
822	err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
823	if (err) {
824		bus_unregister_notifier(bus, nb);
825		kfree(nb);
826		return err;
827	}
828
829	return 0;
830}
831
832/**
833 * bus_set_iommu - set iommu-callbacks for the bus
834 * @bus: bus.
835 * @ops: the callbacks provided by the iommu-driver
836 *
837 * This function is called by an iommu driver to set the iommu methods
838 * used for a particular bus. Drivers for devices on that bus can use
839 * the iommu-api after these ops are registered.
840 * This special function is needed because IOMMUs are usually devices on
841 * the bus itself, so the iommu drivers are not initialized when the bus
842 * is set up. With this function the iommu-driver can set the iommu-ops
843 * afterwards.
844 */
845int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
846{
847	int err;
848
849	if (bus->iommu_ops != NULL)
850		return -EBUSY;
851
852	bus->iommu_ops = ops;
853
854	/* Do IOMMU specific setup for this bus-type */
855	err = iommu_bus_init(bus, ops);
856	if (err)
857		bus->iommu_ops = NULL;
858
859	return err;
860}
861EXPORT_SYMBOL_GPL(bus_set_iommu);
862
863bool iommu_present(struct bus_type *bus)
864{
865	return bus->iommu_ops != NULL;
866}
867EXPORT_SYMBOL_GPL(iommu_present);
868
869bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
870{
871	if (!bus->iommu_ops || !bus->iommu_ops->capable)
872		return false;
873
874	return bus->iommu_ops->capable(cap);
875}
876EXPORT_SYMBOL_GPL(iommu_capable);
877
878/**
879 * iommu_set_fault_handler() - set a fault handler for an iommu domain
880 * @domain: iommu domain
881 * @handler: fault handler
882 * @token: user data, will be passed back to the fault handler
883 *
884 * This function should be used by IOMMU users which want to be notified
885 * whenever an IOMMU fault happens.
886 *
887 * The fault handler itself should return 0 on success, and an appropriate
888 * error code otherwise.
889 */
890void iommu_set_fault_handler(struct iommu_domain *domain,
891					iommu_fault_handler_t handler,
892					void *token)
893{
894	BUG_ON(!domain);
895
896	domain->handler = handler;
897	domain->handler_token = token;
898}
899EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
900
901struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
902{
903	struct iommu_domain *domain;
904
905	if (bus == NULL || bus->iommu_ops == NULL)
906		return NULL;
907
908	domain = bus->iommu_ops->domain_alloc(IOMMU_DOMAIN_UNMANAGED);
909	if (!domain)
910		return NULL;
911
912	domain->ops  = bus->iommu_ops;
913	domain->type = IOMMU_DOMAIN_UNMANAGED;
914
915	return domain;
916}
917EXPORT_SYMBOL_GPL(iommu_domain_alloc);
918
919void iommu_domain_free(struct iommu_domain *domain)
920{
921	domain->ops->domain_free(domain);
922}
923EXPORT_SYMBOL_GPL(iommu_domain_free);
924
925int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
926{
927	int ret;
928	if (unlikely(domain->ops->attach_dev == NULL))
929		return -ENODEV;
930
931	ret = domain->ops->attach_dev(domain, dev);
932	if (!ret)
933		trace_attach_device_to_domain(dev);
934	return ret;
935}
936EXPORT_SYMBOL_GPL(iommu_attach_device);
937
938void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
939{
940	if (unlikely(domain->ops->detach_dev == NULL))
941		return;
942
943	domain->ops->detach_dev(domain, dev);
944	trace_detach_device_from_domain(dev);
945}
946EXPORT_SYMBOL_GPL(iommu_detach_device);
947
948/*
949 * IOMMU groups are really the natrual working unit of the IOMMU, but
950 * the IOMMU API works on domains and devices.  Bridge that gap by
951 * iterating over the devices in a group.  Ideally we'd have a single
952 * device which represents the requestor ID of the group, but we also
953 * allow IOMMU drivers to create policy defined minimum sets, where
954 * the physical hardware may be able to distiguish members, but we
955 * wish to group them at a higher level (ex. untrusted multi-function
956 * PCI devices).  Thus we attach each device.
957 */
958static int iommu_group_do_attach_device(struct device *dev, void *data)
959{
960	struct iommu_domain *domain = data;
961
962	return iommu_attach_device(domain, dev);
963}
964
965int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
966{
967	return iommu_group_for_each_dev(group, domain,
968					iommu_group_do_attach_device);
969}
970EXPORT_SYMBOL_GPL(iommu_attach_group);
971
972static int iommu_group_do_detach_device(struct device *dev, void *data)
973{
974	struct iommu_domain *domain = data;
975
976	iommu_detach_device(domain, dev);
977
978	return 0;
979}
980
981void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
982{
983	iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
984}
985EXPORT_SYMBOL_GPL(iommu_detach_group);
986
987phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
988{
989	if (unlikely(domain->ops->iova_to_phys == NULL))
990		return 0;
991
992	return domain->ops->iova_to_phys(domain, iova);
993}
994EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
995
996static size_t iommu_pgsize(struct iommu_domain *domain,
997			   unsigned long addr_merge, size_t size)
998{
999	unsigned int pgsize_idx;
1000	size_t pgsize;
1001
1002	/* Max page size that still fits into 'size' */
1003	pgsize_idx = __fls(size);
1004
1005	/* need to consider alignment requirements ? */
1006	if (likely(addr_merge)) {
1007		/* Max page size allowed by address */
1008		unsigned int align_pgsize_idx = __ffs(addr_merge);
1009		pgsize_idx = min(pgsize_idx, align_pgsize_idx);
1010	}
1011
1012	/* build a mask of acceptable page sizes */
1013	pgsize = (1UL << (pgsize_idx + 1)) - 1;
1014
1015	/* throw away page sizes not supported by the hardware */
1016	pgsize &= domain->ops->pgsize_bitmap;
1017
1018	/* make sure we're still sane */
1019	BUG_ON(!pgsize);
1020
1021	/* pick the biggest page */
1022	pgsize_idx = __fls(pgsize);
1023	pgsize = 1UL << pgsize_idx;
1024
1025	return pgsize;
1026}
1027
1028int iommu_map(struct iommu_domain *domain, unsigned long iova,
1029	      phys_addr_t paddr, size_t size, int prot)
1030{
1031	unsigned long orig_iova = iova;
1032	unsigned int min_pagesz;
1033	size_t orig_size = size;
1034	int ret = 0;
1035
1036	if (unlikely(domain->ops->map == NULL ||
1037		     domain->ops->pgsize_bitmap == 0UL))
1038		return -ENODEV;
1039
1040	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
1041		return -EINVAL;
1042
1043	/* find out the minimum page size supported */
1044	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1045
1046	/*
1047	 * both the virtual address and the physical one, as well as
1048	 * the size of the mapping, must be aligned (at least) to the
1049	 * size of the smallest page supported by the hardware
1050	 */
1051	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
1052		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
1053		       iova, &paddr, size, min_pagesz);
1054		return -EINVAL;
1055	}
1056
1057	pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
1058
1059	while (size) {
1060		size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
1061
1062		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
1063			 iova, &paddr, pgsize);
1064
1065		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
1066		if (ret)
1067			break;
1068
1069		iova += pgsize;
1070		paddr += pgsize;
1071		size -= pgsize;
1072	}
1073
1074	/* unroll mapping in case something went wrong */
1075	if (ret)
1076		iommu_unmap(domain, orig_iova, orig_size - size);
1077	else
1078		trace_map(orig_iova, paddr, orig_size);
1079
1080	return ret;
1081}
1082EXPORT_SYMBOL_GPL(iommu_map);
1083
1084size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1085{
1086	size_t unmapped_page, unmapped = 0;
1087	unsigned int min_pagesz;
1088	unsigned long orig_iova = iova;
1089
1090	if (unlikely(domain->ops->unmap == NULL ||
1091		     domain->ops->pgsize_bitmap == 0UL))
1092		return -ENODEV;
1093
1094	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
1095		return -EINVAL;
1096
1097	/* find out the minimum page size supported */
1098	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1099
1100	/*
1101	 * The virtual address, as well as the size of the mapping, must be
1102	 * aligned (at least) to the size of the smallest page supported
1103	 * by the hardware
1104	 */
1105	if (!IS_ALIGNED(iova | size, min_pagesz)) {
1106		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
1107		       iova, size, min_pagesz);
1108		return -EINVAL;
1109	}
1110
1111	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
1112
1113	/*
1114	 * Keep iterating until we either unmap 'size' bytes (or more)
1115	 * or we hit an area that isn't mapped.
1116	 */
1117	while (unmapped < size) {
1118		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
1119
1120		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
1121		if (!unmapped_page)
1122			break;
1123
1124		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
1125			 iova, unmapped_page);
1126
1127		iova += unmapped_page;
1128		unmapped += unmapped_page;
1129	}
1130
1131	trace_unmap(orig_iova, size, unmapped);
1132	return unmapped;
1133}
1134EXPORT_SYMBOL_GPL(iommu_unmap);
1135
1136size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
1137			 struct scatterlist *sg, unsigned int nents, int prot)
1138{
1139	struct scatterlist *s;
1140	size_t mapped = 0;
1141	unsigned int i, min_pagesz;
1142	int ret;
1143
1144	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
1145		return 0;
1146
1147	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1148
1149	for_each_sg(sg, s, nents, i) {
1150		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
1151
1152		/*
1153		 * We are mapping on IOMMU page boundaries, so offset within
1154		 * the page must be 0. However, the IOMMU may support pages
1155		 * smaller than PAGE_SIZE, so s->offset may still represent
1156		 * an offset of that boundary within the CPU page.
1157		 */
1158		if (!IS_ALIGNED(s->offset, min_pagesz))
1159			goto out_err;
1160
1161		ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
1162		if (ret)
1163			goto out_err;
1164
1165		mapped += s->length;
1166	}
1167
1168	return mapped;
1169
1170out_err:
1171	/* undo mappings already done */
1172	iommu_unmap(domain, iova, mapped);
1173
1174	return 0;
1175
1176}
1177EXPORT_SYMBOL_GPL(default_iommu_map_sg);
1178
1179int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
1180			       phys_addr_t paddr, u64 size, int prot)
1181{
1182	if (unlikely(domain->ops->domain_window_enable == NULL))
1183		return -ENODEV;
1184
1185	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
1186						 prot);
1187}
1188EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
1189
1190void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
1191{
1192	if (unlikely(domain->ops->domain_window_disable == NULL))
1193		return;
1194
1195	return domain->ops->domain_window_disable(domain, wnd_nr);
1196}
1197EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
1198
1199static int __init iommu_init(void)
1200{
1201	iommu_group_kset = kset_create_and_add("iommu_groups",
1202					       NULL, kernel_kobj);
1203	ida_init(&iommu_group_ida);
1204	mutex_init(&iommu_group_mutex);
1205
1206	BUG_ON(!iommu_group_kset);
1207
1208	return 0;
1209}
1210arch_initcall(iommu_init);
1211
1212int iommu_domain_get_attr(struct iommu_domain *domain,
1213			  enum iommu_attr attr, void *data)
1214{
1215	struct iommu_domain_geometry *geometry;
1216	bool *paging;
1217	int ret = 0;
1218	u32 *count;
1219
1220	switch (attr) {
1221	case DOMAIN_ATTR_GEOMETRY:
1222		geometry  = data;
1223		*geometry = domain->geometry;
1224
1225		break;
1226	case DOMAIN_ATTR_PAGING:
1227		paging  = data;
1228		*paging = (domain->ops->pgsize_bitmap != 0UL);
1229		break;
1230	case DOMAIN_ATTR_WINDOWS:
1231		count = data;
1232
1233		if (domain->ops->domain_get_windows != NULL)
1234			*count = domain->ops->domain_get_windows(domain);
1235		else
1236			ret = -ENODEV;
1237
1238		break;
1239	default:
1240		if (!domain->ops->domain_get_attr)
1241			return -EINVAL;
1242
1243		ret = domain->ops->domain_get_attr(domain, attr, data);
1244	}
1245
1246	return ret;
1247}
1248EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
1249
1250int iommu_domain_set_attr(struct iommu_domain *domain,
1251			  enum iommu_attr attr, void *data)
1252{
1253	int ret = 0;
1254	u32 *count;
1255
1256	switch (attr) {
1257	case DOMAIN_ATTR_WINDOWS:
1258		count = data;
1259
1260		if (domain->ops->domain_set_windows != NULL)
1261			ret = domain->ops->domain_set_windows(domain, *count);
1262		else
1263			ret = -ENODEV;
1264
1265		break;
1266	default:
1267		if (domain->ops->domain_set_attr == NULL)
1268			return -EINVAL;
1269
1270		ret = domain->ops->domain_set_attr(domain, attr, data);
1271	}
1272
1273	return ret;
1274}
1275EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
1276