1/*
2 * TI VPE mem2mem driver, based on the virtual v4l2-mem2mem example driver
3 *
4 * Copyright (c) 2013 Texas Instruments Inc.
5 * David Griego, <dagriego@biglakesoftware.com>
6 * Dale Farnsworth, <dale@farnsworth.org>
7 * Archit Taneja, <archit@ti.com>
8 *
9 * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd.
10 * Pawel Osciak, <pawel@osciak.com>
11 * Marek Szyprowski, <m.szyprowski@samsung.com>
12 *
13 * Based on the virtual v4l2-mem2mem example device
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License version 2 as published by
17 * the Free Software Foundation
18 */
19
20#include <linux/delay.h>
21#include <linux/dma-mapping.h>
22#include <linux/err.h>
23#include <linux/fs.h>
24#include <linux/interrupt.h>
25#include <linux/io.h>
26#include <linux/ioctl.h>
27#include <linux/module.h>
28#include <linux/of.h>
29#include <linux/platform_device.h>
30#include <linux/pm_runtime.h>
31#include <linux/sched.h>
32#include <linux/slab.h>
33#include <linux/videodev2.h>
34#include <linux/log2.h>
35#include <linux/sizes.h>
36
37#include <media/v4l2-common.h>
38#include <media/v4l2-ctrls.h>
39#include <media/v4l2-device.h>
40#include <media/v4l2-event.h>
41#include <media/v4l2-ioctl.h>
42#include <media/v4l2-mem2mem.h>
43#include <media/videobuf2-core.h>
44#include <media/videobuf2-dma-contig.h>
45
46#include "vpdma.h"
47#include "vpe_regs.h"
48#include "sc.h"
49#include "csc.h"
50
51#define VPE_MODULE_NAME "vpe"
52
53/* minimum and maximum frame sizes */
54#define MIN_W		32
55#define MIN_H		32
56#define MAX_W		1920
57#define MAX_H		1080
58
59/* required alignments */
60#define S_ALIGN		0	/* multiple of 1 */
61#define H_ALIGN		1	/* multiple of 2 */
62
63/* flags that indicate a format can be used for capture/output */
64#define VPE_FMT_TYPE_CAPTURE	(1 << 0)
65#define VPE_FMT_TYPE_OUTPUT	(1 << 1)
66
67/* used as plane indices */
68#define VPE_MAX_PLANES	2
69#define VPE_LUMA	0
70#define VPE_CHROMA	1
71
72/* per m2m context info */
73#define VPE_MAX_SRC_BUFS	3	/* need 3 src fields to de-interlace */
74
75#define VPE_DEF_BUFS_PER_JOB	1	/* default one buffer per batch job */
76
77/*
78 * each VPE context can need up to 3 config descriptors, 7 input descriptors,
79 * 3 output descriptors, and 10 control descriptors
80 */
81#define VPE_DESC_LIST_SIZE	(10 * VPDMA_DTD_DESC_SIZE +	\
82					13 * VPDMA_CFD_CTD_DESC_SIZE)
83
84#define vpe_dbg(vpedev, fmt, arg...)	\
85		dev_dbg((vpedev)->v4l2_dev.dev, fmt, ##arg)
86#define vpe_err(vpedev, fmt, arg...)	\
87		dev_err((vpedev)->v4l2_dev.dev, fmt, ##arg)
88
89struct vpe_us_coeffs {
90	unsigned short	anchor_fid0_c0;
91	unsigned short	anchor_fid0_c1;
92	unsigned short	anchor_fid0_c2;
93	unsigned short	anchor_fid0_c3;
94	unsigned short	interp_fid0_c0;
95	unsigned short	interp_fid0_c1;
96	unsigned short	interp_fid0_c2;
97	unsigned short	interp_fid0_c3;
98	unsigned short	anchor_fid1_c0;
99	unsigned short	anchor_fid1_c1;
100	unsigned short	anchor_fid1_c2;
101	unsigned short	anchor_fid1_c3;
102	unsigned short	interp_fid1_c0;
103	unsigned short	interp_fid1_c1;
104	unsigned short	interp_fid1_c2;
105	unsigned short	interp_fid1_c3;
106};
107
108/*
109 * Default upsampler coefficients
110 */
111static const struct vpe_us_coeffs us_coeffs[] = {
112	{
113		/* Coefficients for progressive input */
114		0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
115		0x00C8, 0x0348, 0x0018, 0x3FD8, 0x3FB8, 0x0378, 0x00E8, 0x3FE8,
116	},
117	{
118		/* Coefficients for Top Field Interlaced input */
119		0x0051, 0x03D5, 0x3FE3, 0x3FF7, 0x3FB5, 0x02E9, 0x018F, 0x3FD3,
120		/* Coefficients for Bottom Field Interlaced input */
121		0x016B, 0x0247, 0x00B1, 0x3F9D, 0x3FCF, 0x03DB, 0x005D, 0x3FF9,
122	},
123};
124
125/*
126 * the following registers are for configuring some of the parameters of the
127 * motion and edge detection blocks inside DEI, these generally remain the same,
128 * these could be passed later via userspace if some one needs to tweak these.
129 */
130struct vpe_dei_regs {
131	unsigned long mdt_spacial_freq_thr_reg;		/* VPE_DEI_REG2 */
132	unsigned long edi_config_reg;			/* VPE_DEI_REG3 */
133	unsigned long edi_lut_reg0;			/* VPE_DEI_REG4 */
134	unsigned long edi_lut_reg1;			/* VPE_DEI_REG5 */
135	unsigned long edi_lut_reg2;			/* VPE_DEI_REG6 */
136	unsigned long edi_lut_reg3;			/* VPE_DEI_REG7 */
137};
138
139/*
140 * default expert DEI register values, unlikely to be modified.
141 */
142static const struct vpe_dei_regs dei_regs = {
143	.mdt_spacial_freq_thr_reg = 0x020C0804u,
144	.edi_config_reg = 0x0118100Fu,
145	.edi_lut_reg0 = 0x08040200u,
146	.edi_lut_reg1 = 0x1010100Cu,
147	.edi_lut_reg2 = 0x10101010u,
148	.edi_lut_reg3 = 0x10101010u,
149};
150
151/*
152 * The port_data structure contains per-port data.
153 */
154struct vpe_port_data {
155	enum vpdma_channel channel;	/* VPDMA channel */
156	u8	vb_index;		/* input frame f, f-1, f-2 index */
157	u8	vb_part;		/* plane index for co-panar formats */
158};
159
160/*
161 * Define indices into the port_data tables
162 */
163#define VPE_PORT_LUMA1_IN	0
164#define VPE_PORT_CHROMA1_IN	1
165#define VPE_PORT_LUMA2_IN	2
166#define VPE_PORT_CHROMA2_IN	3
167#define VPE_PORT_LUMA3_IN	4
168#define VPE_PORT_CHROMA3_IN	5
169#define VPE_PORT_MV_IN		6
170#define VPE_PORT_MV_OUT		7
171#define VPE_PORT_LUMA_OUT	8
172#define VPE_PORT_CHROMA_OUT	9
173#define VPE_PORT_RGB_OUT	10
174
175static const struct vpe_port_data port_data[11] = {
176	[VPE_PORT_LUMA1_IN] = {
177		.channel	= VPE_CHAN_LUMA1_IN,
178		.vb_index	= 0,
179		.vb_part	= VPE_LUMA,
180	},
181	[VPE_PORT_CHROMA1_IN] = {
182		.channel	= VPE_CHAN_CHROMA1_IN,
183		.vb_index	= 0,
184		.vb_part	= VPE_CHROMA,
185	},
186	[VPE_PORT_LUMA2_IN] = {
187		.channel	= VPE_CHAN_LUMA2_IN,
188		.vb_index	= 1,
189		.vb_part	= VPE_LUMA,
190	},
191	[VPE_PORT_CHROMA2_IN] = {
192		.channel	= VPE_CHAN_CHROMA2_IN,
193		.vb_index	= 1,
194		.vb_part	= VPE_CHROMA,
195	},
196	[VPE_PORT_LUMA3_IN] = {
197		.channel	= VPE_CHAN_LUMA3_IN,
198		.vb_index	= 2,
199		.vb_part	= VPE_LUMA,
200	},
201	[VPE_PORT_CHROMA3_IN] = {
202		.channel	= VPE_CHAN_CHROMA3_IN,
203		.vb_index	= 2,
204		.vb_part	= VPE_CHROMA,
205	},
206	[VPE_PORT_MV_IN] = {
207		.channel	= VPE_CHAN_MV_IN,
208	},
209	[VPE_PORT_MV_OUT] = {
210		.channel	= VPE_CHAN_MV_OUT,
211	},
212	[VPE_PORT_LUMA_OUT] = {
213		.channel	= VPE_CHAN_LUMA_OUT,
214		.vb_part	= VPE_LUMA,
215	},
216	[VPE_PORT_CHROMA_OUT] = {
217		.channel	= VPE_CHAN_CHROMA_OUT,
218		.vb_part	= VPE_CHROMA,
219	},
220	[VPE_PORT_RGB_OUT] = {
221		.channel	= VPE_CHAN_RGB_OUT,
222		.vb_part	= VPE_LUMA,
223	},
224};
225
226
227/* driver info for each of the supported video formats */
228struct vpe_fmt {
229	char	*name;			/* human-readable name */
230	u32	fourcc;			/* standard format identifier */
231	u8	types;			/* CAPTURE and/or OUTPUT */
232	u8	coplanar;		/* set for unpacked Luma and Chroma */
233	/* vpdma format info for each plane */
234	struct vpdma_data_format const *vpdma_fmt[VPE_MAX_PLANES];
235};
236
237static struct vpe_fmt vpe_formats[] = {
238	{
239		.name		= "YUV 422 co-planar",
240		.fourcc		= V4L2_PIX_FMT_NV16,
241		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
242		.coplanar	= 1,
243		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y444],
244				    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C444],
245				  },
246	},
247	{
248		.name		= "YUV 420 co-planar",
249		.fourcc		= V4L2_PIX_FMT_NV12,
250		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
251		.coplanar	= 1,
252		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_Y420],
253				    &vpdma_yuv_fmts[VPDMA_DATA_FMT_C420],
254				  },
255	},
256	{
257		.name		= "YUYV 422 packed",
258		.fourcc		= V4L2_PIX_FMT_YUYV,
259		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
260		.coplanar	= 0,
261		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_YC422],
262				  },
263	},
264	{
265		.name		= "UYVY 422 packed",
266		.fourcc		= V4L2_PIX_FMT_UYVY,
267		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
268		.coplanar	= 0,
269		.vpdma_fmt	= { &vpdma_yuv_fmts[VPDMA_DATA_FMT_CY422],
270				  },
271	},
272	{
273		.name		= "RGB888 packed",
274		.fourcc		= V4L2_PIX_FMT_RGB24,
275		.types		= VPE_FMT_TYPE_CAPTURE,
276		.coplanar	= 0,
277		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_RGB24],
278				  },
279	},
280	{
281		.name		= "ARGB32",
282		.fourcc		= V4L2_PIX_FMT_RGB32,
283		.types		= VPE_FMT_TYPE_CAPTURE,
284		.coplanar	= 0,
285		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ARGB32],
286				  },
287	},
288	{
289		.name		= "BGR888 packed",
290		.fourcc		= V4L2_PIX_FMT_BGR24,
291		.types		= VPE_FMT_TYPE_CAPTURE,
292		.coplanar	= 0,
293		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_BGR24],
294				  },
295	},
296	{
297		.name		= "ABGR32",
298		.fourcc		= V4L2_PIX_FMT_BGR32,
299		.types		= VPE_FMT_TYPE_CAPTURE,
300		.coplanar	= 0,
301		.vpdma_fmt	= { &vpdma_rgb_fmts[VPDMA_DATA_FMT_ABGR32],
302				  },
303	},
304};
305
306/*
307 * per-queue, driver-specific private data.
308 * there is one source queue and one destination queue for each m2m context.
309 */
310struct vpe_q_data {
311	unsigned int		width;				/* frame width */
312	unsigned int		height;				/* frame height */
313	unsigned int		bytesperline[VPE_MAX_PLANES];	/* bytes per line in memory */
314	enum v4l2_colorspace	colorspace;
315	enum v4l2_field		field;				/* supported field value */
316	unsigned int		flags;
317	unsigned int		sizeimage[VPE_MAX_PLANES];	/* image size in memory */
318	struct v4l2_rect	c_rect;				/* crop/compose rectangle */
319	struct vpe_fmt		*fmt;				/* format info */
320};
321
322/* vpe_q_data flag bits */
323#define	Q_DATA_FRAME_1D		(1 << 0)
324#define	Q_DATA_MODE_TILED	(1 << 1)
325#define	Q_DATA_INTERLACED	(1 << 2)
326
327enum {
328	Q_DATA_SRC = 0,
329	Q_DATA_DST = 1,
330};
331
332/* find our format description corresponding to the passed v4l2_format */
333static struct vpe_fmt *find_format(struct v4l2_format *f)
334{
335	struct vpe_fmt *fmt;
336	unsigned int k;
337
338	for (k = 0; k < ARRAY_SIZE(vpe_formats); k++) {
339		fmt = &vpe_formats[k];
340		if (fmt->fourcc == f->fmt.pix.pixelformat)
341			return fmt;
342	}
343
344	return NULL;
345}
346
347/*
348 * there is one vpe_dev structure in the driver, it is shared by
349 * all instances.
350 */
351struct vpe_dev {
352	struct v4l2_device	v4l2_dev;
353	struct video_device	vfd;
354	struct v4l2_m2m_dev	*m2m_dev;
355
356	atomic_t		num_instances;	/* count of driver instances */
357	dma_addr_t		loaded_mmrs;	/* shadow mmrs in device */
358	struct mutex		dev_mutex;
359	spinlock_t		lock;
360
361	int			irq;
362	void __iomem		*base;
363	struct resource		*res;
364
365	struct vb2_alloc_ctx	*alloc_ctx;
366	struct vpdma_data	*vpdma;		/* vpdma data handle */
367	struct sc_data		*sc;		/* scaler data handle */
368	struct csc_data		*csc;		/* csc data handle */
369};
370
371/*
372 * There is one vpe_ctx structure for each m2m context.
373 */
374struct vpe_ctx {
375	struct v4l2_fh		fh;
376	struct vpe_dev		*dev;
377	struct v4l2_ctrl_handler hdl;
378
379	unsigned int		field;			/* current field */
380	unsigned int		sequence;		/* current frame/field seq */
381	unsigned int		aborting;		/* abort after next irq */
382
383	unsigned int		bufs_per_job;		/* input buffers per batch */
384	unsigned int		bufs_completed;		/* bufs done in this batch */
385
386	struct vpe_q_data	q_data[2];		/* src & dst queue data */
387	struct vb2_buffer	*src_vbs[VPE_MAX_SRC_BUFS];
388	struct vb2_buffer	*dst_vb;
389
390	dma_addr_t		mv_buf_dma[2];		/* dma addrs of motion vector in/out bufs */
391	void			*mv_buf[2];		/* virtual addrs of motion vector bufs */
392	size_t			mv_buf_size;		/* current motion vector buffer size */
393	struct vpdma_buf	mmr_adb;		/* shadow reg addr/data block */
394	struct vpdma_buf	sc_coeff_h;		/* h coeff buffer */
395	struct vpdma_buf	sc_coeff_v;		/* v coeff buffer */
396	struct vpdma_desc_list	desc_list;		/* DMA descriptor list */
397
398	bool			deinterlacing;		/* using de-interlacer */
399	bool			load_mmrs;		/* have new shadow reg values */
400
401	unsigned int		src_mv_buf_selector;
402};
403
404
405/*
406 * M2M devices get 2 queues.
407 * Return the queue given the type.
408 */
409static struct vpe_q_data *get_q_data(struct vpe_ctx *ctx,
410				     enum v4l2_buf_type type)
411{
412	switch (type) {
413	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
414	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
415		return &ctx->q_data[Q_DATA_SRC];
416	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
417	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
418		return &ctx->q_data[Q_DATA_DST];
419	default:
420		BUG();
421	}
422	return NULL;
423}
424
425static u32 read_reg(struct vpe_dev *dev, int offset)
426{
427	return ioread32(dev->base + offset);
428}
429
430static void write_reg(struct vpe_dev *dev, int offset, u32 value)
431{
432	iowrite32(value, dev->base + offset);
433}
434
435/* register field read/write helpers */
436static int get_field(u32 value, u32 mask, int shift)
437{
438	return (value & (mask << shift)) >> shift;
439}
440
441static int read_field_reg(struct vpe_dev *dev, int offset, u32 mask, int shift)
442{
443	return get_field(read_reg(dev, offset), mask, shift);
444}
445
446static void write_field(u32 *valp, u32 field, u32 mask, int shift)
447{
448	u32 val = *valp;
449
450	val &= ~(mask << shift);
451	val |= (field & mask) << shift;
452	*valp = val;
453}
454
455static void write_field_reg(struct vpe_dev *dev, int offset, u32 field,
456		u32 mask, int shift)
457{
458	u32 val = read_reg(dev, offset);
459
460	write_field(&val, field, mask, shift);
461
462	write_reg(dev, offset, val);
463}
464
465/*
466 * DMA address/data block for the shadow registers
467 */
468struct vpe_mmr_adb {
469	struct vpdma_adb_hdr	out_fmt_hdr;
470	u32			out_fmt_reg[1];
471	u32			out_fmt_pad[3];
472	struct vpdma_adb_hdr	us1_hdr;
473	u32			us1_regs[8];
474	struct vpdma_adb_hdr	us2_hdr;
475	u32			us2_regs[8];
476	struct vpdma_adb_hdr	us3_hdr;
477	u32			us3_regs[8];
478	struct vpdma_adb_hdr	dei_hdr;
479	u32			dei_regs[8];
480	struct vpdma_adb_hdr	sc_hdr0;
481	u32			sc_regs0[7];
482	u32			sc_pad0[1];
483	struct vpdma_adb_hdr	sc_hdr8;
484	u32			sc_regs8[6];
485	u32			sc_pad8[2];
486	struct vpdma_adb_hdr	sc_hdr17;
487	u32			sc_regs17[9];
488	u32			sc_pad17[3];
489	struct vpdma_adb_hdr	csc_hdr;
490	u32			csc_regs[6];
491	u32			csc_pad[2];
492};
493
494#define GET_OFFSET_TOP(ctx, obj, reg)	\
495	((obj)->res->start - ctx->dev->res->start + reg)
496
497#define VPE_SET_MMR_ADB_HDR(ctx, hdr, regs, offset_a)	\
498	VPDMA_SET_MMR_ADB_HDR(ctx->mmr_adb, vpe_mmr_adb, hdr, regs, offset_a)
499/*
500 * Set the headers for all of the address/data block structures.
501 */
502static void init_adb_hdrs(struct vpe_ctx *ctx)
503{
504	VPE_SET_MMR_ADB_HDR(ctx, out_fmt_hdr, out_fmt_reg, VPE_CLK_FORMAT_SELECT);
505	VPE_SET_MMR_ADB_HDR(ctx, us1_hdr, us1_regs, VPE_US1_R0);
506	VPE_SET_MMR_ADB_HDR(ctx, us2_hdr, us2_regs, VPE_US2_R0);
507	VPE_SET_MMR_ADB_HDR(ctx, us3_hdr, us3_regs, VPE_US3_R0);
508	VPE_SET_MMR_ADB_HDR(ctx, dei_hdr, dei_regs, VPE_DEI_FRAME_SIZE);
509	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr0, sc_regs0,
510		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC0));
511	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr8, sc_regs8,
512		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC8));
513	VPE_SET_MMR_ADB_HDR(ctx, sc_hdr17, sc_regs17,
514		GET_OFFSET_TOP(ctx, ctx->dev->sc, CFG_SC17));
515	VPE_SET_MMR_ADB_HDR(ctx, csc_hdr, csc_regs,
516		GET_OFFSET_TOP(ctx, ctx->dev->csc, CSC_CSC00));
517};
518
519/*
520 * Allocate or re-allocate the motion vector DMA buffers
521 * There are two buffers, one for input and one for output.
522 * However, the roles are reversed after each field is processed.
523 * In other words, after each field is processed, the previous
524 * output (dst) MV buffer becomes the new input (src) MV buffer.
525 */
526static int realloc_mv_buffers(struct vpe_ctx *ctx, size_t size)
527{
528	struct device *dev = ctx->dev->v4l2_dev.dev;
529
530	if (ctx->mv_buf_size == size)
531		return 0;
532
533	if (ctx->mv_buf[0])
534		dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[0],
535			ctx->mv_buf_dma[0]);
536
537	if (ctx->mv_buf[1])
538		dma_free_coherent(dev, ctx->mv_buf_size, ctx->mv_buf[1],
539			ctx->mv_buf_dma[1]);
540
541	if (size == 0)
542		return 0;
543
544	ctx->mv_buf[0] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[0],
545				GFP_KERNEL);
546	if (!ctx->mv_buf[0]) {
547		vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
548		return -ENOMEM;
549	}
550
551	ctx->mv_buf[1] = dma_alloc_coherent(dev, size, &ctx->mv_buf_dma[1],
552				GFP_KERNEL);
553	if (!ctx->mv_buf[1]) {
554		vpe_err(ctx->dev, "failed to allocate motion vector buffer\n");
555		dma_free_coherent(dev, size, ctx->mv_buf[0],
556			ctx->mv_buf_dma[0]);
557
558		return -ENOMEM;
559	}
560
561	ctx->mv_buf_size = size;
562	ctx->src_mv_buf_selector = 0;
563
564	return 0;
565}
566
567static void free_mv_buffers(struct vpe_ctx *ctx)
568{
569	realloc_mv_buffers(ctx, 0);
570}
571
572/*
573 * While de-interlacing, we keep the two most recent input buffers
574 * around.  This function frees those two buffers when we have
575 * finished processing the current stream.
576 */
577static void free_vbs(struct vpe_ctx *ctx)
578{
579	struct vpe_dev *dev = ctx->dev;
580	unsigned long flags;
581
582	if (ctx->src_vbs[2] == NULL)
583		return;
584
585	spin_lock_irqsave(&dev->lock, flags);
586	if (ctx->src_vbs[2]) {
587		v4l2_m2m_buf_done(ctx->src_vbs[2], VB2_BUF_STATE_DONE);
588		v4l2_m2m_buf_done(ctx->src_vbs[1], VB2_BUF_STATE_DONE);
589	}
590	spin_unlock_irqrestore(&dev->lock, flags);
591}
592
593/*
594 * Enable or disable the VPE clocks
595 */
596static void vpe_set_clock_enable(struct vpe_dev *dev, bool on)
597{
598	u32 val = 0;
599
600	if (on)
601		val = VPE_DATA_PATH_CLK_ENABLE | VPE_VPEDMA_CLK_ENABLE;
602	write_reg(dev, VPE_CLK_ENABLE, val);
603}
604
605static void vpe_top_reset(struct vpe_dev *dev)
606{
607
608	write_field_reg(dev, VPE_CLK_RESET, 1, VPE_DATA_PATH_CLK_RESET_MASK,
609		VPE_DATA_PATH_CLK_RESET_SHIFT);
610
611	usleep_range(100, 150);
612
613	write_field_reg(dev, VPE_CLK_RESET, 0, VPE_DATA_PATH_CLK_RESET_MASK,
614		VPE_DATA_PATH_CLK_RESET_SHIFT);
615}
616
617static void vpe_top_vpdma_reset(struct vpe_dev *dev)
618{
619	write_field_reg(dev, VPE_CLK_RESET, 1, VPE_VPDMA_CLK_RESET_MASK,
620		VPE_VPDMA_CLK_RESET_SHIFT);
621
622	usleep_range(100, 150);
623
624	write_field_reg(dev, VPE_CLK_RESET, 0, VPE_VPDMA_CLK_RESET_MASK,
625		VPE_VPDMA_CLK_RESET_SHIFT);
626}
627
628/*
629 * Load the correct of upsampler coefficients into the shadow MMRs
630 */
631static void set_us_coefficients(struct vpe_ctx *ctx)
632{
633	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
634	struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
635	u32 *us1_reg = &mmr_adb->us1_regs[0];
636	u32 *us2_reg = &mmr_adb->us2_regs[0];
637	u32 *us3_reg = &mmr_adb->us3_regs[0];
638	const unsigned short *cp, *end_cp;
639
640	cp = &us_coeffs[0].anchor_fid0_c0;
641
642	if (s_q_data->flags & Q_DATA_INTERLACED)	/* interlaced */
643		cp += sizeof(us_coeffs[0]) / sizeof(*cp);
644
645	end_cp = cp + sizeof(us_coeffs[0]) / sizeof(*cp);
646
647	while (cp < end_cp) {
648		write_field(us1_reg, *cp++, VPE_US_C0_MASK, VPE_US_C0_SHIFT);
649		write_field(us1_reg, *cp++, VPE_US_C1_MASK, VPE_US_C1_SHIFT);
650		*us2_reg++ = *us1_reg;
651		*us3_reg++ = *us1_reg++;
652	}
653	ctx->load_mmrs = true;
654}
655
656/*
657 * Set the upsampler config mode and the VPDMA line mode in the shadow MMRs.
658 */
659static void set_cfg_and_line_modes(struct vpe_ctx *ctx)
660{
661	struct vpe_fmt *fmt = ctx->q_data[Q_DATA_SRC].fmt;
662	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
663	u32 *us1_reg0 = &mmr_adb->us1_regs[0];
664	u32 *us2_reg0 = &mmr_adb->us2_regs[0];
665	u32 *us3_reg0 = &mmr_adb->us3_regs[0];
666	int line_mode = 1;
667	int cfg_mode = 1;
668
669	/*
670	 * Cfg Mode 0: YUV420 source, enable upsampler, DEI is de-interlacing.
671	 * Cfg Mode 1: YUV422 source, disable upsampler, DEI is de-interlacing.
672	 */
673
674	if (fmt->fourcc == V4L2_PIX_FMT_NV12) {
675		cfg_mode = 0;
676		line_mode = 0;		/* double lines to line buffer */
677	}
678
679	write_field(us1_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
680	write_field(us2_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
681	write_field(us3_reg0, cfg_mode, VPE_US_MODE_MASK, VPE_US_MODE_SHIFT);
682
683	/* regs for now */
684	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA1_IN);
685	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA2_IN);
686	vpdma_set_line_mode(ctx->dev->vpdma, line_mode, VPE_CHAN_CHROMA3_IN);
687
688	/* frame start for input luma */
689	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
690		VPE_CHAN_LUMA1_IN);
691	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
692		VPE_CHAN_LUMA2_IN);
693	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
694		VPE_CHAN_LUMA3_IN);
695
696	/* frame start for input chroma */
697	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
698		VPE_CHAN_CHROMA1_IN);
699	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
700		VPE_CHAN_CHROMA2_IN);
701	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
702		VPE_CHAN_CHROMA3_IN);
703
704	/* frame start for MV in client */
705	vpdma_set_frame_start_event(ctx->dev->vpdma, VPDMA_FSEVENT_CHANNEL_ACTIVE,
706		VPE_CHAN_MV_IN);
707
708	ctx->load_mmrs = true;
709}
710
711/*
712 * Set the shadow registers that are modified when the source
713 * format changes.
714 */
715static void set_src_registers(struct vpe_ctx *ctx)
716{
717	set_us_coefficients(ctx);
718}
719
720/*
721 * Set the shadow registers that are modified when the destination
722 * format changes.
723 */
724static void set_dst_registers(struct vpe_ctx *ctx)
725{
726	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
727	enum v4l2_colorspace clrspc = ctx->q_data[Q_DATA_DST].colorspace;
728	struct vpe_fmt *fmt = ctx->q_data[Q_DATA_DST].fmt;
729	u32 val = 0;
730
731	if (clrspc == V4L2_COLORSPACE_SRGB)
732		val |= VPE_RGB_OUT_SELECT;
733	else if (fmt->fourcc == V4L2_PIX_FMT_NV16)
734		val |= VPE_COLOR_SEPARATE_422;
735
736	/*
737	 * the source of CHR_DS and CSC is always the scaler, irrespective of
738	 * whether it's used or not
739	 */
740	val |= VPE_DS_SRC_DEI_SCALER | VPE_CSC_SRC_DEI_SCALER;
741
742	if (fmt->fourcc != V4L2_PIX_FMT_NV12)
743		val |= VPE_DS_BYPASS;
744
745	mmr_adb->out_fmt_reg[0] = val;
746
747	ctx->load_mmrs = true;
748}
749
750/*
751 * Set the de-interlacer shadow register values
752 */
753static void set_dei_regs(struct vpe_ctx *ctx)
754{
755	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
756	struct vpe_q_data *s_q_data = &ctx->q_data[Q_DATA_SRC];
757	unsigned int src_h = s_q_data->c_rect.height;
758	unsigned int src_w = s_q_data->c_rect.width;
759	u32 *dei_mmr0 = &mmr_adb->dei_regs[0];
760	bool deinterlace = true;
761	u32 val = 0;
762
763	/*
764	 * according to TRM, we should set DEI in progressive bypass mode when
765	 * the input content is progressive, however, DEI is bypassed correctly
766	 * for both progressive and interlace content in interlace bypass mode.
767	 * It has been recommended not to use progressive bypass mode.
768	 */
769	if ((!ctx->deinterlacing && (s_q_data->flags & Q_DATA_INTERLACED)) ||
770			!(s_q_data->flags & Q_DATA_INTERLACED)) {
771		deinterlace = false;
772		val = VPE_DEI_INTERLACE_BYPASS;
773	}
774
775	src_h = deinterlace ? src_h * 2 : src_h;
776
777	val |= (src_h << VPE_DEI_HEIGHT_SHIFT) |
778		(src_w << VPE_DEI_WIDTH_SHIFT) |
779		VPE_DEI_FIELD_FLUSH;
780
781	*dei_mmr0 = val;
782
783	ctx->load_mmrs = true;
784}
785
786static void set_dei_shadow_registers(struct vpe_ctx *ctx)
787{
788	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
789	u32 *dei_mmr = &mmr_adb->dei_regs[0];
790	const struct vpe_dei_regs *cur = &dei_regs;
791
792	dei_mmr[2]  = cur->mdt_spacial_freq_thr_reg;
793	dei_mmr[3]  = cur->edi_config_reg;
794	dei_mmr[4]  = cur->edi_lut_reg0;
795	dei_mmr[5]  = cur->edi_lut_reg1;
796	dei_mmr[6]  = cur->edi_lut_reg2;
797	dei_mmr[7]  = cur->edi_lut_reg3;
798
799	ctx->load_mmrs = true;
800}
801
802/*
803 * Set the shadow registers whose values are modified when either the
804 * source or destination format is changed.
805 */
806static int set_srcdst_params(struct vpe_ctx *ctx)
807{
808	struct vpe_q_data *s_q_data =  &ctx->q_data[Q_DATA_SRC];
809	struct vpe_q_data *d_q_data =  &ctx->q_data[Q_DATA_DST];
810	struct vpe_mmr_adb *mmr_adb = ctx->mmr_adb.addr;
811	unsigned int src_w = s_q_data->c_rect.width;
812	unsigned int src_h = s_q_data->c_rect.height;
813	unsigned int dst_w = d_q_data->c_rect.width;
814	unsigned int dst_h = d_q_data->c_rect.height;
815	size_t mv_buf_size;
816	int ret;
817
818	ctx->sequence = 0;
819	ctx->field = V4L2_FIELD_TOP;
820
821	if ((s_q_data->flags & Q_DATA_INTERLACED) &&
822			!(d_q_data->flags & Q_DATA_INTERLACED)) {
823		int bytes_per_line;
824		const struct vpdma_data_format *mv =
825			&vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
826
827		/*
828		 * we make sure that the source image has a 16 byte aligned
829		 * stride, we need to do the same for the motion vector buffer
830		 * by aligning it's stride to the next 16 byte boundry. this
831		 * extra space will not be used by the de-interlacer, but will
832		 * ensure that vpdma operates correctly
833		 */
834		bytes_per_line = ALIGN((s_q_data->width * mv->depth) >> 3,
835					VPDMA_STRIDE_ALIGN);
836		mv_buf_size = bytes_per_line * s_q_data->height;
837
838		ctx->deinterlacing = true;
839		src_h <<= 1;
840	} else {
841		ctx->deinterlacing = false;
842		mv_buf_size = 0;
843	}
844
845	free_vbs(ctx);
846
847	ret = realloc_mv_buffers(ctx, mv_buf_size);
848	if (ret)
849		return ret;
850
851	set_cfg_and_line_modes(ctx);
852	set_dei_regs(ctx);
853
854	csc_set_coeff(ctx->dev->csc, &mmr_adb->csc_regs[0],
855		s_q_data->colorspace, d_q_data->colorspace);
856
857	sc_set_hs_coeffs(ctx->dev->sc, ctx->sc_coeff_h.addr, src_w, dst_w);
858	sc_set_vs_coeffs(ctx->dev->sc, ctx->sc_coeff_v.addr, src_h, dst_h);
859
860	sc_config_scaler(ctx->dev->sc, &mmr_adb->sc_regs0[0],
861		&mmr_adb->sc_regs8[0], &mmr_adb->sc_regs17[0],
862		src_w, src_h, dst_w, dst_h);
863
864	return 0;
865}
866
867/*
868 * Return the vpe_ctx structure for a given struct file
869 */
870static struct vpe_ctx *file2ctx(struct file *file)
871{
872	return container_of(file->private_data, struct vpe_ctx, fh);
873}
874
875/*
876 * mem2mem callbacks
877 */
878
879/**
880 * job_ready() - check whether an instance is ready to be scheduled to run
881 */
882static int job_ready(void *priv)
883{
884	struct vpe_ctx *ctx = priv;
885	int needed = ctx->bufs_per_job;
886
887	if (ctx->deinterlacing && ctx->src_vbs[2] == NULL)
888		needed += 2;	/* need additional two most recent fields */
889
890	if (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) < needed)
891		return 0;
892
893	if (v4l2_m2m_num_dst_bufs_ready(ctx->fh.m2m_ctx) < needed)
894		return 0;
895
896	return 1;
897}
898
899static void job_abort(void *priv)
900{
901	struct vpe_ctx *ctx = priv;
902
903	/* Will cancel the transaction in the next interrupt handler */
904	ctx->aborting = 1;
905}
906
907/*
908 * Lock access to the device
909 */
910static void vpe_lock(void *priv)
911{
912	struct vpe_ctx *ctx = priv;
913	struct vpe_dev *dev = ctx->dev;
914	mutex_lock(&dev->dev_mutex);
915}
916
917static void vpe_unlock(void *priv)
918{
919	struct vpe_ctx *ctx = priv;
920	struct vpe_dev *dev = ctx->dev;
921	mutex_unlock(&dev->dev_mutex);
922}
923
924static void vpe_dump_regs(struct vpe_dev *dev)
925{
926#define DUMPREG(r) vpe_dbg(dev, "%-35s %08x\n", #r, read_reg(dev, VPE_##r))
927
928	vpe_dbg(dev, "VPE Registers:\n");
929
930	DUMPREG(PID);
931	DUMPREG(SYSCONFIG);
932	DUMPREG(INT0_STATUS0_RAW);
933	DUMPREG(INT0_STATUS0);
934	DUMPREG(INT0_ENABLE0);
935	DUMPREG(INT0_STATUS1_RAW);
936	DUMPREG(INT0_STATUS1);
937	DUMPREG(INT0_ENABLE1);
938	DUMPREG(CLK_ENABLE);
939	DUMPREG(CLK_RESET);
940	DUMPREG(CLK_FORMAT_SELECT);
941	DUMPREG(CLK_RANGE_MAP);
942	DUMPREG(US1_R0);
943	DUMPREG(US1_R1);
944	DUMPREG(US1_R2);
945	DUMPREG(US1_R3);
946	DUMPREG(US1_R4);
947	DUMPREG(US1_R5);
948	DUMPREG(US1_R6);
949	DUMPREG(US1_R7);
950	DUMPREG(US2_R0);
951	DUMPREG(US2_R1);
952	DUMPREG(US2_R2);
953	DUMPREG(US2_R3);
954	DUMPREG(US2_R4);
955	DUMPREG(US2_R5);
956	DUMPREG(US2_R6);
957	DUMPREG(US2_R7);
958	DUMPREG(US3_R0);
959	DUMPREG(US3_R1);
960	DUMPREG(US3_R2);
961	DUMPREG(US3_R3);
962	DUMPREG(US3_R4);
963	DUMPREG(US3_R5);
964	DUMPREG(US3_R6);
965	DUMPREG(US3_R7);
966	DUMPREG(DEI_FRAME_SIZE);
967	DUMPREG(MDT_BYPASS);
968	DUMPREG(MDT_SF_THRESHOLD);
969	DUMPREG(EDI_CONFIG);
970	DUMPREG(DEI_EDI_LUT_R0);
971	DUMPREG(DEI_EDI_LUT_R1);
972	DUMPREG(DEI_EDI_LUT_R2);
973	DUMPREG(DEI_EDI_LUT_R3);
974	DUMPREG(DEI_FMD_WINDOW_R0);
975	DUMPREG(DEI_FMD_WINDOW_R1);
976	DUMPREG(DEI_FMD_CONTROL_R0);
977	DUMPREG(DEI_FMD_CONTROL_R1);
978	DUMPREG(DEI_FMD_STATUS_R0);
979	DUMPREG(DEI_FMD_STATUS_R1);
980	DUMPREG(DEI_FMD_STATUS_R2);
981#undef DUMPREG
982
983	sc_dump_regs(dev->sc);
984	csc_dump_regs(dev->csc);
985}
986
987static void add_out_dtd(struct vpe_ctx *ctx, int port)
988{
989	struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_DST];
990	const struct vpe_port_data *p_data = &port_data[port];
991	struct vb2_buffer *vb = ctx->dst_vb;
992	struct vpe_fmt *fmt = q_data->fmt;
993	const struct vpdma_data_format *vpdma_fmt;
994	int mv_buf_selector = !ctx->src_mv_buf_selector;
995	dma_addr_t dma_addr;
996	u32 flags = 0;
997
998	if (port == VPE_PORT_MV_OUT) {
999		vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1000		dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1001	} else {
1002		/* to incorporate interleaved formats */
1003		int plane = fmt->coplanar ? p_data->vb_part : 0;
1004
1005		vpdma_fmt = fmt->vpdma_fmt[plane];
1006		dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1007		if (!dma_addr) {
1008			vpe_err(ctx->dev,
1009				"acquiring output buffer(%d) dma_addr failed\n",
1010				port);
1011			return;
1012		}
1013	}
1014
1015	if (q_data->flags & Q_DATA_FRAME_1D)
1016		flags |= VPDMA_DATA_FRAME_1D;
1017	if (q_data->flags & Q_DATA_MODE_TILED)
1018		flags |= VPDMA_DATA_MODE_TILED;
1019
1020	vpdma_add_out_dtd(&ctx->desc_list, q_data->width, &q_data->c_rect,
1021		vpdma_fmt, dma_addr, p_data->channel, flags);
1022}
1023
1024static void add_in_dtd(struct vpe_ctx *ctx, int port)
1025{
1026	struct vpe_q_data *q_data = &ctx->q_data[Q_DATA_SRC];
1027	const struct vpe_port_data *p_data = &port_data[port];
1028	struct vb2_buffer *vb = ctx->src_vbs[p_data->vb_index];
1029	struct vpe_fmt *fmt = q_data->fmt;
1030	const struct vpdma_data_format *vpdma_fmt;
1031	int mv_buf_selector = ctx->src_mv_buf_selector;
1032	int field = vb->v4l2_buf.field == V4L2_FIELD_BOTTOM;
1033	int frame_width, frame_height;
1034	dma_addr_t dma_addr;
1035	u32 flags = 0;
1036
1037	if (port == VPE_PORT_MV_IN) {
1038		vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV];
1039		dma_addr = ctx->mv_buf_dma[mv_buf_selector];
1040	} else {
1041		/* to incorporate interleaved formats */
1042		int plane = fmt->coplanar ? p_data->vb_part : 0;
1043
1044		vpdma_fmt = fmt->vpdma_fmt[plane];
1045
1046		dma_addr = vb2_dma_contig_plane_dma_addr(vb, plane);
1047		if (!dma_addr) {
1048			vpe_err(ctx->dev,
1049				"acquiring input buffer(%d) dma_addr failed\n",
1050				port);
1051			return;
1052		}
1053	}
1054
1055	if (q_data->flags & Q_DATA_FRAME_1D)
1056		flags |= VPDMA_DATA_FRAME_1D;
1057	if (q_data->flags & Q_DATA_MODE_TILED)
1058		flags |= VPDMA_DATA_MODE_TILED;
1059
1060	frame_width = q_data->c_rect.width;
1061	frame_height = q_data->c_rect.height;
1062
1063	if (p_data->vb_part && fmt->fourcc == V4L2_PIX_FMT_NV12)
1064		frame_height /= 2;
1065
1066	vpdma_add_in_dtd(&ctx->desc_list, q_data->width, &q_data->c_rect,
1067		vpdma_fmt, dma_addr, p_data->channel, field, flags, frame_width,
1068		frame_height, 0, 0);
1069}
1070
1071/*
1072 * Enable the expected IRQ sources
1073 */
1074static void enable_irqs(struct vpe_ctx *ctx)
1075{
1076	write_reg(ctx->dev, VPE_INT0_ENABLE0_SET, VPE_INT0_LIST0_COMPLETE);
1077	write_reg(ctx->dev, VPE_INT0_ENABLE1_SET, VPE_DEI_ERROR_INT |
1078				VPE_DS1_UV_ERROR_INT);
1079
1080	vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, true);
1081}
1082
1083static void disable_irqs(struct vpe_ctx *ctx)
1084{
1085	write_reg(ctx->dev, VPE_INT0_ENABLE0_CLR, 0xffffffff);
1086	write_reg(ctx->dev, VPE_INT0_ENABLE1_CLR, 0xffffffff);
1087
1088	vpdma_enable_list_complete_irq(ctx->dev->vpdma, 0, false);
1089}
1090
1091/* device_run() - prepares and starts the device
1092 *
1093 * This function is only called when both the source and destination
1094 * buffers are in place.
1095 */
1096static void device_run(void *priv)
1097{
1098	struct vpe_ctx *ctx = priv;
1099	struct sc_data *sc = ctx->dev->sc;
1100	struct vpe_q_data *d_q_data = &ctx->q_data[Q_DATA_DST];
1101
1102	if (ctx->deinterlacing && ctx->src_vbs[2] == NULL) {
1103		ctx->src_vbs[2] = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1104		WARN_ON(ctx->src_vbs[2] == NULL);
1105		ctx->src_vbs[1] = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1106		WARN_ON(ctx->src_vbs[1] == NULL);
1107	}
1108
1109	ctx->src_vbs[0] = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1110	WARN_ON(ctx->src_vbs[0] == NULL);
1111	ctx->dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1112	WARN_ON(ctx->dst_vb == NULL);
1113
1114	/* config descriptors */
1115	if (ctx->dev->loaded_mmrs != ctx->mmr_adb.dma_addr || ctx->load_mmrs) {
1116		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->mmr_adb);
1117		vpdma_add_cfd_adb(&ctx->desc_list, CFD_MMR_CLIENT, &ctx->mmr_adb);
1118		ctx->dev->loaded_mmrs = ctx->mmr_adb.dma_addr;
1119		ctx->load_mmrs = false;
1120	}
1121
1122	if (sc->loaded_coeff_h != ctx->sc_coeff_h.dma_addr ||
1123			sc->load_coeff_h) {
1124		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_h);
1125		vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1126			&ctx->sc_coeff_h, 0);
1127
1128		sc->loaded_coeff_h = ctx->sc_coeff_h.dma_addr;
1129		sc->load_coeff_h = false;
1130	}
1131
1132	if (sc->loaded_coeff_v != ctx->sc_coeff_v.dma_addr ||
1133			sc->load_coeff_v) {
1134		vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->sc_coeff_v);
1135		vpdma_add_cfd_block(&ctx->desc_list, CFD_SC_CLIENT,
1136			&ctx->sc_coeff_v, SC_COEF_SRAM_SIZE >> 4);
1137
1138		sc->loaded_coeff_v = ctx->sc_coeff_v.dma_addr;
1139		sc->load_coeff_v = false;
1140	}
1141
1142	/* output data descriptors */
1143	if (ctx->deinterlacing)
1144		add_out_dtd(ctx, VPE_PORT_MV_OUT);
1145
1146	if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1147		add_out_dtd(ctx, VPE_PORT_RGB_OUT);
1148	} else {
1149		add_out_dtd(ctx, VPE_PORT_LUMA_OUT);
1150		if (d_q_data->fmt->coplanar)
1151			add_out_dtd(ctx, VPE_PORT_CHROMA_OUT);
1152	}
1153
1154	/* input data descriptors */
1155	if (ctx->deinterlacing) {
1156		add_in_dtd(ctx, VPE_PORT_LUMA3_IN);
1157		add_in_dtd(ctx, VPE_PORT_CHROMA3_IN);
1158
1159		add_in_dtd(ctx, VPE_PORT_LUMA2_IN);
1160		add_in_dtd(ctx, VPE_PORT_CHROMA2_IN);
1161	}
1162
1163	add_in_dtd(ctx, VPE_PORT_LUMA1_IN);
1164	add_in_dtd(ctx, VPE_PORT_CHROMA1_IN);
1165
1166	if (ctx->deinterlacing)
1167		add_in_dtd(ctx, VPE_PORT_MV_IN);
1168
1169	/* sync on channel control descriptors for input ports */
1170	vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_LUMA1_IN);
1171	vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_CHROMA1_IN);
1172
1173	if (ctx->deinterlacing) {
1174		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1175			VPE_CHAN_LUMA2_IN);
1176		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1177			VPE_CHAN_CHROMA2_IN);
1178
1179		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1180			VPE_CHAN_LUMA3_IN);
1181		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1182			VPE_CHAN_CHROMA3_IN);
1183
1184		vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_IN);
1185	}
1186
1187	/* sync on channel control descriptors for output ports */
1188	if (d_q_data->colorspace == V4L2_COLORSPACE_SRGB) {
1189		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1190			VPE_CHAN_RGB_OUT);
1191	} else {
1192		vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1193			VPE_CHAN_LUMA_OUT);
1194		if (d_q_data->fmt->coplanar)
1195			vpdma_add_sync_on_channel_ctd(&ctx->desc_list,
1196				VPE_CHAN_CHROMA_OUT);
1197	}
1198
1199	if (ctx->deinterlacing)
1200		vpdma_add_sync_on_channel_ctd(&ctx->desc_list, VPE_CHAN_MV_OUT);
1201
1202	enable_irqs(ctx);
1203
1204	vpdma_map_desc_buf(ctx->dev->vpdma, &ctx->desc_list.buf);
1205	vpdma_submit_descs(ctx->dev->vpdma, &ctx->desc_list);
1206}
1207
1208static void dei_error(struct vpe_ctx *ctx)
1209{
1210	dev_warn(ctx->dev->v4l2_dev.dev,
1211		"received DEI error interrupt\n");
1212}
1213
1214static void ds1_uv_error(struct vpe_ctx *ctx)
1215{
1216	dev_warn(ctx->dev->v4l2_dev.dev,
1217		"received downsampler error interrupt\n");
1218}
1219
1220static irqreturn_t vpe_irq(int irq_vpe, void *data)
1221{
1222	struct vpe_dev *dev = (struct vpe_dev *)data;
1223	struct vpe_ctx *ctx;
1224	struct vpe_q_data *d_q_data;
1225	struct vb2_buffer *s_vb, *d_vb;
1226	struct v4l2_buffer *s_buf, *d_buf;
1227	unsigned long flags;
1228	u32 irqst0, irqst1;
1229
1230	irqst0 = read_reg(dev, VPE_INT0_STATUS0);
1231	if (irqst0) {
1232		write_reg(dev, VPE_INT0_STATUS0_CLR, irqst0);
1233		vpe_dbg(dev, "INT0_STATUS0 = 0x%08x\n", irqst0);
1234	}
1235
1236	irqst1 = read_reg(dev, VPE_INT0_STATUS1);
1237	if (irqst1) {
1238		write_reg(dev, VPE_INT0_STATUS1_CLR, irqst1);
1239		vpe_dbg(dev, "INT0_STATUS1 = 0x%08x\n", irqst1);
1240	}
1241
1242	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
1243	if (!ctx) {
1244		vpe_err(dev, "instance released before end of transaction\n");
1245		goto handled;
1246	}
1247
1248	if (irqst1) {
1249		if (irqst1 & VPE_DEI_ERROR_INT) {
1250			irqst1 &= ~VPE_DEI_ERROR_INT;
1251			dei_error(ctx);
1252		}
1253		if (irqst1 & VPE_DS1_UV_ERROR_INT) {
1254			irqst1 &= ~VPE_DS1_UV_ERROR_INT;
1255			ds1_uv_error(ctx);
1256		}
1257	}
1258
1259	if (irqst0) {
1260		if (irqst0 & VPE_INT0_LIST0_COMPLETE)
1261			vpdma_clear_list_stat(ctx->dev->vpdma);
1262
1263		irqst0 &= ~(VPE_INT0_LIST0_COMPLETE);
1264	}
1265
1266	if (irqst0 | irqst1) {
1267		dev_warn(dev->v4l2_dev.dev, "Unexpected interrupt: "
1268			"INT0_STATUS0 = 0x%08x, INT0_STATUS1 = 0x%08x\n",
1269			irqst0, irqst1);
1270	}
1271
1272	disable_irqs(ctx);
1273
1274	vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf);
1275	vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb);
1276	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h);
1277	vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v);
1278
1279	vpdma_reset_desc_list(&ctx->desc_list);
1280
1281	 /* the previous dst mv buffer becomes the next src mv buffer */
1282	ctx->src_mv_buf_selector = !ctx->src_mv_buf_selector;
1283
1284	if (ctx->aborting)
1285		goto finished;
1286
1287	s_vb = ctx->src_vbs[0];
1288	d_vb = ctx->dst_vb;
1289	s_buf = &s_vb->v4l2_buf;
1290	d_buf = &d_vb->v4l2_buf;
1291
1292	d_buf->flags = s_buf->flags;
1293
1294	d_buf->timestamp = s_buf->timestamp;
1295	if (s_buf->flags & V4L2_BUF_FLAG_TIMECODE)
1296		d_buf->timecode = s_buf->timecode;
1297
1298	d_buf->sequence = ctx->sequence;
1299
1300	d_q_data = &ctx->q_data[Q_DATA_DST];
1301	if (d_q_data->flags & Q_DATA_INTERLACED) {
1302		d_buf->field = ctx->field;
1303		if (ctx->field == V4L2_FIELD_BOTTOM) {
1304			ctx->sequence++;
1305			ctx->field = V4L2_FIELD_TOP;
1306		} else {
1307			WARN_ON(ctx->field != V4L2_FIELD_TOP);
1308			ctx->field = V4L2_FIELD_BOTTOM;
1309		}
1310	} else {
1311		d_buf->field = V4L2_FIELD_NONE;
1312		ctx->sequence++;
1313	}
1314
1315	if (ctx->deinterlacing)
1316		s_vb = ctx->src_vbs[2];
1317
1318	spin_lock_irqsave(&dev->lock, flags);
1319	v4l2_m2m_buf_done(s_vb, VB2_BUF_STATE_DONE);
1320	v4l2_m2m_buf_done(d_vb, VB2_BUF_STATE_DONE);
1321	spin_unlock_irqrestore(&dev->lock, flags);
1322
1323	if (ctx->deinterlacing) {
1324		ctx->src_vbs[2] = ctx->src_vbs[1];
1325		ctx->src_vbs[1] = ctx->src_vbs[0];
1326	}
1327
1328	ctx->bufs_completed++;
1329	if (ctx->bufs_completed < ctx->bufs_per_job) {
1330		device_run(ctx);
1331		goto handled;
1332	}
1333
1334finished:
1335	vpe_dbg(ctx->dev, "finishing transaction\n");
1336	ctx->bufs_completed = 0;
1337	v4l2_m2m_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx);
1338handled:
1339	return IRQ_HANDLED;
1340}
1341
1342/*
1343 * video ioctls
1344 */
1345static int vpe_querycap(struct file *file, void *priv,
1346			struct v4l2_capability *cap)
1347{
1348	strncpy(cap->driver, VPE_MODULE_NAME, sizeof(cap->driver) - 1);
1349	strncpy(cap->card, VPE_MODULE_NAME, sizeof(cap->card) - 1);
1350	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
1351		VPE_MODULE_NAME);
1352	cap->device_caps  = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING;
1353	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
1354	return 0;
1355}
1356
1357static int __enum_fmt(struct v4l2_fmtdesc *f, u32 type)
1358{
1359	int i, index;
1360	struct vpe_fmt *fmt = NULL;
1361
1362	index = 0;
1363	for (i = 0; i < ARRAY_SIZE(vpe_formats); ++i) {
1364		if (vpe_formats[i].types & type) {
1365			if (index == f->index) {
1366				fmt = &vpe_formats[i];
1367				break;
1368			}
1369			index++;
1370		}
1371	}
1372
1373	if (!fmt)
1374		return -EINVAL;
1375
1376	strncpy(f->description, fmt->name, sizeof(f->description) - 1);
1377	f->pixelformat = fmt->fourcc;
1378	return 0;
1379}
1380
1381static int vpe_enum_fmt(struct file *file, void *priv,
1382				struct v4l2_fmtdesc *f)
1383{
1384	if (V4L2_TYPE_IS_OUTPUT(f->type))
1385		return __enum_fmt(f, VPE_FMT_TYPE_OUTPUT);
1386
1387	return __enum_fmt(f, VPE_FMT_TYPE_CAPTURE);
1388}
1389
1390static int vpe_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
1391{
1392	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1393	struct vpe_ctx *ctx = file2ctx(file);
1394	struct vb2_queue *vq;
1395	struct vpe_q_data *q_data;
1396	int i;
1397
1398	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1399	if (!vq)
1400		return -EINVAL;
1401
1402	q_data = get_q_data(ctx, f->type);
1403
1404	pix->width = q_data->width;
1405	pix->height = q_data->height;
1406	pix->pixelformat = q_data->fmt->fourcc;
1407	pix->field = q_data->field;
1408
1409	if (V4L2_TYPE_IS_OUTPUT(f->type)) {
1410		pix->colorspace = q_data->colorspace;
1411	} else {
1412		struct vpe_q_data *s_q_data;
1413
1414		/* get colorspace from the source queue */
1415		s_q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
1416
1417		pix->colorspace = s_q_data->colorspace;
1418	}
1419
1420	pix->num_planes = q_data->fmt->coplanar ? 2 : 1;
1421
1422	for (i = 0; i < pix->num_planes; i++) {
1423		pix->plane_fmt[i].bytesperline = q_data->bytesperline[i];
1424		pix->plane_fmt[i].sizeimage = q_data->sizeimage[i];
1425	}
1426
1427	return 0;
1428}
1429
1430static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f,
1431		       struct vpe_fmt *fmt, int type)
1432{
1433	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1434	struct v4l2_plane_pix_format *plane_fmt;
1435	unsigned int w_align;
1436	int i, depth, depth_bytes;
1437
1438	if (!fmt || !(fmt->types & type)) {
1439		vpe_err(ctx->dev, "Fourcc format (0x%08x) invalid.\n",
1440			pix->pixelformat);
1441		return -EINVAL;
1442	}
1443
1444	if (pix->field != V4L2_FIELD_NONE && pix->field != V4L2_FIELD_ALTERNATE)
1445		pix->field = V4L2_FIELD_NONE;
1446
1447	depth = fmt->vpdma_fmt[VPE_LUMA]->depth;
1448
1449	/*
1450	 * the line stride should 16 byte aligned for VPDMA to work, based on
1451	 * the bytes per pixel, figure out how much the width should be aligned
1452	 * to make sure line stride is 16 byte aligned
1453	 */
1454	depth_bytes = depth >> 3;
1455
1456	if (depth_bytes == 3)
1457		/*
1458		 * if bpp is 3(as in some RGB formats), the pixel width doesn't
1459		 * really help in ensuring line stride is 16 byte aligned
1460		 */
1461		w_align = 4;
1462	else
1463		/*
1464		 * for the remainder bpp(4, 2 and 1), the pixel width alignment
1465		 * can ensure a line stride alignment of 16 bytes. For example,
1466		 * if bpp is 2, then the line stride can be 16 byte aligned if
1467		 * the width is 8 byte aligned
1468		 */
1469		w_align = order_base_2(VPDMA_DESC_ALIGN / depth_bytes);
1470
1471	v4l_bound_align_image(&pix->width, MIN_W, MAX_W, w_align,
1472			      &pix->height, MIN_H, MAX_H, H_ALIGN,
1473			      S_ALIGN);
1474
1475	pix->num_planes = fmt->coplanar ? 2 : 1;
1476	pix->pixelformat = fmt->fourcc;
1477
1478	if (!pix->colorspace) {
1479		if (fmt->fourcc == V4L2_PIX_FMT_RGB24 ||
1480				fmt->fourcc == V4L2_PIX_FMT_BGR24 ||
1481				fmt->fourcc == V4L2_PIX_FMT_RGB32 ||
1482				fmt->fourcc == V4L2_PIX_FMT_BGR32) {
1483			pix->colorspace = V4L2_COLORSPACE_SRGB;
1484		} else {
1485			if (pix->height > 1280)	/* HD */
1486				pix->colorspace = V4L2_COLORSPACE_REC709;
1487			else			/* SD */
1488				pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
1489		}
1490	}
1491
1492	memset(pix->reserved, 0, sizeof(pix->reserved));
1493	for (i = 0; i < pix->num_planes; i++) {
1494		plane_fmt = &pix->plane_fmt[i];
1495		depth = fmt->vpdma_fmt[i]->depth;
1496
1497		if (i == VPE_LUMA)
1498			plane_fmt->bytesperline = (pix->width * depth) >> 3;
1499		else
1500			plane_fmt->bytesperline = pix->width;
1501
1502		plane_fmt->sizeimage =
1503				(pix->height * pix->width * depth) >> 3;
1504
1505		memset(plane_fmt->reserved, 0, sizeof(plane_fmt->reserved));
1506	}
1507
1508	return 0;
1509}
1510
1511static int vpe_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
1512{
1513	struct vpe_ctx *ctx = file2ctx(file);
1514	struct vpe_fmt *fmt = find_format(f);
1515
1516	if (V4L2_TYPE_IS_OUTPUT(f->type))
1517		return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_OUTPUT);
1518	else
1519		return __vpe_try_fmt(ctx, f, fmt, VPE_FMT_TYPE_CAPTURE);
1520}
1521
1522static int __vpe_s_fmt(struct vpe_ctx *ctx, struct v4l2_format *f)
1523{
1524	struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
1525	struct v4l2_plane_pix_format *plane_fmt;
1526	struct vpe_q_data *q_data;
1527	struct vb2_queue *vq;
1528	int i;
1529
1530	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
1531	if (!vq)
1532		return -EINVAL;
1533
1534	if (vb2_is_busy(vq)) {
1535		vpe_err(ctx->dev, "queue busy\n");
1536		return -EBUSY;
1537	}
1538
1539	q_data = get_q_data(ctx, f->type);
1540	if (!q_data)
1541		return -EINVAL;
1542
1543	q_data->fmt		= find_format(f);
1544	q_data->width		= pix->width;
1545	q_data->height		= pix->height;
1546	q_data->colorspace	= pix->colorspace;
1547	q_data->field		= pix->field;
1548
1549	for (i = 0; i < pix->num_planes; i++) {
1550		plane_fmt = &pix->plane_fmt[i];
1551
1552		q_data->bytesperline[i]	= plane_fmt->bytesperline;
1553		q_data->sizeimage[i]	= plane_fmt->sizeimage;
1554	}
1555
1556	q_data->c_rect.left	= 0;
1557	q_data->c_rect.top	= 0;
1558	q_data->c_rect.width	= q_data->width;
1559	q_data->c_rect.height	= q_data->height;
1560
1561	if (q_data->field == V4L2_FIELD_ALTERNATE)
1562		q_data->flags |= Q_DATA_INTERLACED;
1563	else
1564		q_data->flags &= ~Q_DATA_INTERLACED;
1565
1566	vpe_dbg(ctx->dev, "Setting format for type %d, wxh: %dx%d, fmt: %d bpl_y %d",
1567		f->type, q_data->width, q_data->height, q_data->fmt->fourcc,
1568		q_data->bytesperline[VPE_LUMA]);
1569	if (q_data->fmt->coplanar)
1570		vpe_dbg(ctx->dev, " bpl_uv %d\n",
1571			q_data->bytesperline[VPE_CHROMA]);
1572
1573	return 0;
1574}
1575
1576static int vpe_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
1577{
1578	int ret;
1579	struct vpe_ctx *ctx = file2ctx(file);
1580
1581	ret = vpe_try_fmt(file, priv, f);
1582	if (ret)
1583		return ret;
1584
1585	ret = __vpe_s_fmt(ctx, f);
1586	if (ret)
1587		return ret;
1588
1589	if (V4L2_TYPE_IS_OUTPUT(f->type))
1590		set_src_registers(ctx);
1591	else
1592		set_dst_registers(ctx);
1593
1594	return set_srcdst_params(ctx);
1595}
1596
1597static int __vpe_try_selection(struct vpe_ctx *ctx, struct v4l2_selection *s)
1598{
1599	struct vpe_q_data *q_data;
1600
1601	if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1602	    (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1603		return -EINVAL;
1604
1605	q_data = get_q_data(ctx, s->type);
1606	if (!q_data)
1607		return -EINVAL;
1608
1609	switch (s->target) {
1610	case V4L2_SEL_TGT_COMPOSE:
1611		/*
1612		 * COMPOSE target is only valid for capture buffer type, return
1613		 * error for output buffer type
1614		 */
1615		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1616			return -EINVAL;
1617		break;
1618	case V4L2_SEL_TGT_CROP:
1619		/*
1620		 * CROP target is only valid for output buffer type, return
1621		 * error for capture buffer type
1622		 */
1623		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1624			return -EINVAL;
1625		break;
1626	/*
1627	 * bound and default crop/compose targets are invalid targets to
1628	 * try/set
1629	 */
1630	default:
1631		return -EINVAL;
1632	}
1633
1634	if (s->r.top < 0 || s->r.left < 0) {
1635		vpe_err(ctx->dev, "negative values for top and left\n");
1636		s->r.top = s->r.left = 0;
1637	}
1638
1639	v4l_bound_align_image(&s->r.width, MIN_W, q_data->width, 1,
1640		&s->r.height, MIN_H, q_data->height, H_ALIGN, S_ALIGN);
1641
1642	/* adjust left/top if cropping rectangle is out of bounds */
1643	if (s->r.left + s->r.width > q_data->width)
1644		s->r.left = q_data->width - s->r.width;
1645	if (s->r.top + s->r.height > q_data->height)
1646		s->r.top = q_data->height - s->r.height;
1647
1648	return 0;
1649}
1650
1651static int vpe_g_selection(struct file *file, void *fh,
1652		struct v4l2_selection *s)
1653{
1654	struct vpe_ctx *ctx = file2ctx(file);
1655	struct vpe_q_data *q_data;
1656	bool use_c_rect = false;
1657
1658	if ((s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) &&
1659	    (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT))
1660		return -EINVAL;
1661
1662	q_data = get_q_data(ctx, s->type);
1663	if (!q_data)
1664		return -EINVAL;
1665
1666	switch (s->target) {
1667	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
1668	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
1669		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1670			return -EINVAL;
1671		break;
1672	case V4L2_SEL_TGT_CROP_BOUNDS:
1673	case V4L2_SEL_TGT_CROP_DEFAULT:
1674		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1675			return -EINVAL;
1676		break;
1677	case V4L2_SEL_TGT_COMPOSE:
1678		if (s->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
1679			return -EINVAL;
1680		use_c_rect = true;
1681		break;
1682	case V4L2_SEL_TGT_CROP:
1683		if (s->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1684			return -EINVAL;
1685		use_c_rect = true;
1686		break;
1687	default:
1688		return -EINVAL;
1689	}
1690
1691	if (use_c_rect) {
1692		/*
1693		 * for CROP/COMPOSE target type, return c_rect params from the
1694		 * respective buffer type
1695		 */
1696		s->r = q_data->c_rect;
1697	} else {
1698		/*
1699		 * for DEFAULT/BOUNDS target type, return width and height from
1700		 * S_FMT of the respective buffer type
1701		 */
1702		s->r.left = 0;
1703		s->r.top = 0;
1704		s->r.width = q_data->width;
1705		s->r.height = q_data->height;
1706	}
1707
1708	return 0;
1709}
1710
1711
1712static int vpe_s_selection(struct file *file, void *fh,
1713		struct v4l2_selection *s)
1714{
1715	struct vpe_ctx *ctx = file2ctx(file);
1716	struct vpe_q_data *q_data;
1717	struct v4l2_selection sel = *s;
1718	int ret;
1719
1720	ret = __vpe_try_selection(ctx, &sel);
1721	if (ret)
1722		return ret;
1723
1724	q_data = get_q_data(ctx, sel.type);
1725	if (!q_data)
1726		return -EINVAL;
1727
1728	if ((q_data->c_rect.left == sel.r.left) &&
1729			(q_data->c_rect.top == sel.r.top) &&
1730			(q_data->c_rect.width == sel.r.width) &&
1731			(q_data->c_rect.height == sel.r.height)) {
1732		vpe_dbg(ctx->dev,
1733			"requested crop/compose values are already set\n");
1734		return 0;
1735	}
1736
1737	q_data->c_rect = sel.r;
1738
1739	return set_srcdst_params(ctx);
1740}
1741
1742/*
1743 * defines number of buffers/frames a context can process with VPE before
1744 * switching to a different context. default value is 1 buffer per context
1745 */
1746#define V4L2_CID_VPE_BUFS_PER_JOB		(V4L2_CID_USER_TI_VPE_BASE + 0)
1747
1748static int vpe_s_ctrl(struct v4l2_ctrl *ctrl)
1749{
1750	struct vpe_ctx *ctx =
1751		container_of(ctrl->handler, struct vpe_ctx, hdl);
1752
1753	switch (ctrl->id) {
1754	case V4L2_CID_VPE_BUFS_PER_JOB:
1755		ctx->bufs_per_job = ctrl->val;
1756		break;
1757
1758	default:
1759		vpe_err(ctx->dev, "Invalid control\n");
1760		return -EINVAL;
1761	}
1762
1763	return 0;
1764}
1765
1766static const struct v4l2_ctrl_ops vpe_ctrl_ops = {
1767	.s_ctrl = vpe_s_ctrl,
1768};
1769
1770static const struct v4l2_ioctl_ops vpe_ioctl_ops = {
1771	.vidioc_querycap		= vpe_querycap,
1772
1773	.vidioc_enum_fmt_vid_cap_mplane	= vpe_enum_fmt,
1774	.vidioc_g_fmt_vid_cap_mplane	= vpe_g_fmt,
1775	.vidioc_try_fmt_vid_cap_mplane	= vpe_try_fmt,
1776	.vidioc_s_fmt_vid_cap_mplane	= vpe_s_fmt,
1777
1778	.vidioc_enum_fmt_vid_out_mplane	= vpe_enum_fmt,
1779	.vidioc_g_fmt_vid_out_mplane	= vpe_g_fmt,
1780	.vidioc_try_fmt_vid_out_mplane	= vpe_try_fmt,
1781	.vidioc_s_fmt_vid_out_mplane	= vpe_s_fmt,
1782
1783	.vidioc_g_selection		= vpe_g_selection,
1784	.vidioc_s_selection		= vpe_s_selection,
1785
1786	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
1787	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
1788	.vidioc_qbuf			= v4l2_m2m_ioctl_qbuf,
1789	.vidioc_dqbuf			= v4l2_m2m_ioctl_dqbuf,
1790	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
1791	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
1792
1793	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
1794	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
1795};
1796
1797/*
1798 * Queue operations
1799 */
1800static int vpe_queue_setup(struct vb2_queue *vq,
1801			   const struct v4l2_format *fmt,
1802			   unsigned int *nbuffers, unsigned int *nplanes,
1803			   unsigned int sizes[], void *alloc_ctxs[])
1804{
1805	int i;
1806	struct vpe_ctx *ctx = vb2_get_drv_priv(vq);
1807	struct vpe_q_data *q_data;
1808
1809	q_data = get_q_data(ctx, vq->type);
1810
1811	*nplanes = q_data->fmt->coplanar ? 2 : 1;
1812
1813	for (i = 0; i < *nplanes; i++) {
1814		sizes[i] = q_data->sizeimage[i];
1815		alloc_ctxs[i] = ctx->dev->alloc_ctx;
1816	}
1817
1818	vpe_dbg(ctx->dev, "get %d buffer(s) of size %d", *nbuffers,
1819		sizes[VPE_LUMA]);
1820	if (q_data->fmt->coplanar)
1821		vpe_dbg(ctx->dev, " and %d\n", sizes[VPE_CHROMA]);
1822
1823	return 0;
1824}
1825
1826static int vpe_buf_prepare(struct vb2_buffer *vb)
1827{
1828	struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
1829	struct vpe_q_data *q_data;
1830	int i, num_planes;
1831
1832	vpe_dbg(ctx->dev, "type: %d\n", vb->vb2_queue->type);
1833
1834	q_data = get_q_data(ctx, vb->vb2_queue->type);
1835	num_planes = q_data->fmt->coplanar ? 2 : 1;
1836
1837	if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
1838		if (!(q_data->flags & Q_DATA_INTERLACED)) {
1839			vb->v4l2_buf.field = V4L2_FIELD_NONE;
1840		} else {
1841			if (vb->v4l2_buf.field != V4L2_FIELD_TOP &&
1842					vb->v4l2_buf.field != V4L2_FIELD_BOTTOM)
1843				return -EINVAL;
1844		}
1845	}
1846
1847	for (i = 0; i < num_planes; i++) {
1848		if (vb2_plane_size(vb, i) < q_data->sizeimage[i]) {
1849			vpe_err(ctx->dev,
1850				"data will not fit into plane (%lu < %lu)\n",
1851				vb2_plane_size(vb, i),
1852				(long) q_data->sizeimage[i]);
1853			return -EINVAL;
1854		}
1855	}
1856
1857	for (i = 0; i < num_planes; i++)
1858		vb2_set_plane_payload(vb, i, q_data->sizeimage[i]);
1859
1860	return 0;
1861}
1862
1863static void vpe_buf_queue(struct vb2_buffer *vb)
1864{
1865	struct vpe_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
1866
1867	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vb);
1868}
1869
1870static int vpe_start_streaming(struct vb2_queue *q, unsigned int count)
1871{
1872	/* currently we do nothing here */
1873
1874	return 0;
1875}
1876
1877static void vpe_stop_streaming(struct vb2_queue *q)
1878{
1879	struct vpe_ctx *ctx = vb2_get_drv_priv(q);
1880
1881	vpe_dump_regs(ctx->dev);
1882	vpdma_dump_regs(ctx->dev->vpdma);
1883}
1884
1885static struct vb2_ops vpe_qops = {
1886	.queue_setup	 = vpe_queue_setup,
1887	.buf_prepare	 = vpe_buf_prepare,
1888	.buf_queue	 = vpe_buf_queue,
1889	.wait_prepare	 = vb2_ops_wait_prepare,
1890	.wait_finish	 = vb2_ops_wait_finish,
1891	.start_streaming = vpe_start_streaming,
1892	.stop_streaming  = vpe_stop_streaming,
1893};
1894
1895static int queue_init(void *priv, struct vb2_queue *src_vq,
1896		      struct vb2_queue *dst_vq)
1897{
1898	struct vpe_ctx *ctx = priv;
1899	struct vpe_dev *dev = ctx->dev;
1900	int ret;
1901
1902	memset(src_vq, 0, sizeof(*src_vq));
1903	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
1904	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
1905	src_vq->drv_priv = ctx;
1906	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
1907	src_vq->ops = &vpe_qops;
1908	src_vq->mem_ops = &vb2_dma_contig_memops;
1909	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
1910	src_vq->lock = &dev->dev_mutex;
1911
1912	ret = vb2_queue_init(src_vq);
1913	if (ret)
1914		return ret;
1915
1916	memset(dst_vq, 0, sizeof(*dst_vq));
1917	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
1918	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
1919	dst_vq->drv_priv = ctx;
1920	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
1921	dst_vq->ops = &vpe_qops;
1922	dst_vq->mem_ops = &vb2_dma_contig_memops;
1923	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
1924	dst_vq->lock = &dev->dev_mutex;
1925
1926	return vb2_queue_init(dst_vq);
1927}
1928
1929static const struct v4l2_ctrl_config vpe_bufs_per_job = {
1930	.ops = &vpe_ctrl_ops,
1931	.id = V4L2_CID_VPE_BUFS_PER_JOB,
1932	.name = "Buffers Per Transaction",
1933	.type = V4L2_CTRL_TYPE_INTEGER,
1934	.def = VPE_DEF_BUFS_PER_JOB,
1935	.min = 1,
1936	.max = VIDEO_MAX_FRAME,
1937	.step = 1,
1938};
1939
1940/*
1941 * File operations
1942 */
1943static int vpe_open(struct file *file)
1944{
1945	struct vpe_dev *dev = video_drvdata(file);
1946	struct vpe_q_data *s_q_data;
1947	struct v4l2_ctrl_handler *hdl;
1948	struct vpe_ctx *ctx;
1949	int ret;
1950
1951	vpe_dbg(dev, "vpe_open\n");
1952
1953	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1954	if (!ctx)
1955		return -ENOMEM;
1956
1957	ctx->dev = dev;
1958
1959	if (mutex_lock_interruptible(&dev->dev_mutex)) {
1960		ret = -ERESTARTSYS;
1961		goto free_ctx;
1962	}
1963
1964	ret = vpdma_create_desc_list(&ctx->desc_list, VPE_DESC_LIST_SIZE,
1965			VPDMA_LIST_TYPE_NORMAL);
1966	if (ret != 0)
1967		goto unlock;
1968
1969	ret = vpdma_alloc_desc_buf(&ctx->mmr_adb, sizeof(struct vpe_mmr_adb));
1970	if (ret != 0)
1971		goto free_desc_list;
1972
1973	ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_h, SC_COEF_SRAM_SIZE);
1974	if (ret != 0)
1975		goto free_mmr_adb;
1976
1977	ret = vpdma_alloc_desc_buf(&ctx->sc_coeff_v, SC_COEF_SRAM_SIZE);
1978	if (ret != 0)
1979		goto free_sc_h;
1980
1981	init_adb_hdrs(ctx);
1982
1983	v4l2_fh_init(&ctx->fh, video_devdata(file));
1984	file->private_data = &ctx->fh;
1985
1986	hdl = &ctx->hdl;
1987	v4l2_ctrl_handler_init(hdl, 1);
1988	v4l2_ctrl_new_custom(hdl, &vpe_bufs_per_job, NULL);
1989	if (hdl->error) {
1990		ret = hdl->error;
1991		goto exit_fh;
1992	}
1993	ctx->fh.ctrl_handler = hdl;
1994	v4l2_ctrl_handler_setup(hdl);
1995
1996	s_q_data = &ctx->q_data[Q_DATA_SRC];
1997	s_q_data->fmt = &vpe_formats[2];
1998	s_q_data->width = 1920;
1999	s_q_data->height = 1080;
2000	s_q_data->bytesperline[VPE_LUMA] = (s_q_data->width *
2001			s_q_data->fmt->vpdma_fmt[VPE_LUMA]->depth) >> 3;
2002	s_q_data->sizeimage[VPE_LUMA] = (s_q_data->bytesperline[VPE_LUMA] *
2003			s_q_data->height);
2004	s_q_data->colorspace = V4L2_COLORSPACE_REC709;
2005	s_q_data->field = V4L2_FIELD_NONE;
2006	s_q_data->c_rect.left = 0;
2007	s_q_data->c_rect.top = 0;
2008	s_q_data->c_rect.width = s_q_data->width;
2009	s_q_data->c_rect.height = s_q_data->height;
2010	s_q_data->flags = 0;
2011
2012	ctx->q_data[Q_DATA_DST] = *s_q_data;
2013
2014	set_dei_shadow_registers(ctx);
2015	set_src_registers(ctx);
2016	set_dst_registers(ctx);
2017	ret = set_srcdst_params(ctx);
2018	if (ret)
2019		goto exit_fh;
2020
2021	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
2022
2023	if (IS_ERR(ctx->fh.m2m_ctx)) {
2024		ret = PTR_ERR(ctx->fh.m2m_ctx);
2025		goto exit_fh;
2026	}
2027
2028	v4l2_fh_add(&ctx->fh);
2029
2030	/*
2031	 * for now, just report the creation of the first instance, we can later
2032	 * optimize the driver to enable or disable clocks when the first
2033	 * instance is created or the last instance released
2034	 */
2035	if (atomic_inc_return(&dev->num_instances) == 1)
2036		vpe_dbg(dev, "first instance created\n");
2037
2038	ctx->bufs_per_job = VPE_DEF_BUFS_PER_JOB;
2039
2040	ctx->load_mmrs = true;
2041
2042	vpe_dbg(dev, "created instance %p, m2m_ctx: %p\n",
2043		ctx, ctx->fh.m2m_ctx);
2044
2045	mutex_unlock(&dev->dev_mutex);
2046
2047	return 0;
2048exit_fh:
2049	v4l2_ctrl_handler_free(hdl);
2050	v4l2_fh_exit(&ctx->fh);
2051	vpdma_free_desc_buf(&ctx->sc_coeff_v);
2052free_sc_h:
2053	vpdma_free_desc_buf(&ctx->sc_coeff_h);
2054free_mmr_adb:
2055	vpdma_free_desc_buf(&ctx->mmr_adb);
2056free_desc_list:
2057	vpdma_free_desc_list(&ctx->desc_list);
2058unlock:
2059	mutex_unlock(&dev->dev_mutex);
2060free_ctx:
2061	kfree(ctx);
2062	return ret;
2063}
2064
2065static int vpe_release(struct file *file)
2066{
2067	struct vpe_dev *dev = video_drvdata(file);
2068	struct vpe_ctx *ctx = file2ctx(file);
2069
2070	vpe_dbg(dev, "releasing instance %p\n", ctx);
2071
2072	mutex_lock(&dev->dev_mutex);
2073	free_vbs(ctx);
2074	free_mv_buffers(ctx);
2075	vpdma_free_desc_list(&ctx->desc_list);
2076	vpdma_free_desc_buf(&ctx->mmr_adb);
2077
2078	v4l2_fh_del(&ctx->fh);
2079	v4l2_fh_exit(&ctx->fh);
2080	v4l2_ctrl_handler_free(&ctx->hdl);
2081	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
2082
2083	kfree(ctx);
2084
2085	/*
2086	 * for now, just report the release of the last instance, we can later
2087	 * optimize the driver to enable or disable clocks when the first
2088	 * instance is created or the last instance released
2089	 */
2090	if (atomic_dec_return(&dev->num_instances) == 0)
2091		vpe_dbg(dev, "last instance released\n");
2092
2093	mutex_unlock(&dev->dev_mutex);
2094
2095	return 0;
2096}
2097
2098static const struct v4l2_file_operations vpe_fops = {
2099	.owner		= THIS_MODULE,
2100	.open		= vpe_open,
2101	.release	= vpe_release,
2102	.poll		= v4l2_m2m_fop_poll,
2103	.unlocked_ioctl	= video_ioctl2,
2104	.mmap		= v4l2_m2m_fop_mmap,
2105};
2106
2107static struct video_device vpe_videodev = {
2108	.name		= VPE_MODULE_NAME,
2109	.fops		= &vpe_fops,
2110	.ioctl_ops	= &vpe_ioctl_ops,
2111	.minor		= -1,
2112	.release	= video_device_release_empty,
2113	.vfl_dir	= VFL_DIR_M2M,
2114};
2115
2116static struct v4l2_m2m_ops m2m_ops = {
2117	.device_run	= device_run,
2118	.job_ready	= job_ready,
2119	.job_abort	= job_abort,
2120	.lock		= vpe_lock,
2121	.unlock		= vpe_unlock,
2122};
2123
2124static int vpe_runtime_get(struct platform_device *pdev)
2125{
2126	int r;
2127
2128	dev_dbg(&pdev->dev, "vpe_runtime_get\n");
2129
2130	r = pm_runtime_get_sync(&pdev->dev);
2131	WARN_ON(r < 0);
2132	return r < 0 ? r : 0;
2133}
2134
2135static void vpe_runtime_put(struct platform_device *pdev)
2136{
2137
2138	int r;
2139
2140	dev_dbg(&pdev->dev, "vpe_runtime_put\n");
2141
2142	r = pm_runtime_put_sync(&pdev->dev);
2143	WARN_ON(r < 0 && r != -ENOSYS);
2144}
2145
2146static void vpe_fw_cb(struct platform_device *pdev)
2147{
2148	struct vpe_dev *dev = platform_get_drvdata(pdev);
2149	struct video_device *vfd;
2150	int ret;
2151
2152	vfd = &dev->vfd;
2153	*vfd = vpe_videodev;
2154	vfd->lock = &dev->dev_mutex;
2155	vfd->v4l2_dev = &dev->v4l2_dev;
2156
2157	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
2158	if (ret) {
2159		vpe_err(dev, "Failed to register video device\n");
2160
2161		vpe_set_clock_enable(dev, 0);
2162		vpe_runtime_put(pdev);
2163		pm_runtime_disable(&pdev->dev);
2164		v4l2_m2m_release(dev->m2m_dev);
2165		vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
2166		v4l2_device_unregister(&dev->v4l2_dev);
2167
2168		return;
2169	}
2170
2171	video_set_drvdata(vfd, dev);
2172	snprintf(vfd->name, sizeof(vfd->name), "%s", vpe_videodev.name);
2173	dev_info(dev->v4l2_dev.dev, "Device registered as /dev/video%d\n",
2174		vfd->num);
2175}
2176
2177static int vpe_probe(struct platform_device *pdev)
2178{
2179	struct vpe_dev *dev;
2180	int ret, irq, func;
2181
2182	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
2183	if (!dev)
2184		return -ENOMEM;
2185
2186	spin_lock_init(&dev->lock);
2187
2188	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
2189	if (ret)
2190		return ret;
2191
2192	atomic_set(&dev->num_instances, 0);
2193	mutex_init(&dev->dev_mutex);
2194
2195	dev->res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
2196			"vpe_top");
2197	/*
2198	 * HACK: we get resource info from device tree in the form of a list of
2199	 * VPE sub blocks, the driver currently uses only the base of vpe_top
2200	 * for register access, the driver should be changed later to access
2201	 * registers based on the sub block base addresses
2202	 */
2203	dev->base = devm_ioremap(&pdev->dev, dev->res->start, SZ_32K);
2204	if (!dev->base) {
2205		ret = -ENOMEM;
2206		goto v4l2_dev_unreg;
2207	}
2208
2209	irq = platform_get_irq(pdev, 0);
2210	ret = devm_request_irq(&pdev->dev, irq, vpe_irq, 0, VPE_MODULE_NAME,
2211			dev);
2212	if (ret)
2213		goto v4l2_dev_unreg;
2214
2215	platform_set_drvdata(pdev, dev);
2216
2217	dev->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
2218	if (IS_ERR(dev->alloc_ctx)) {
2219		vpe_err(dev, "Failed to alloc vb2 context\n");
2220		ret = PTR_ERR(dev->alloc_ctx);
2221		goto v4l2_dev_unreg;
2222	}
2223
2224	dev->m2m_dev = v4l2_m2m_init(&m2m_ops);
2225	if (IS_ERR(dev->m2m_dev)) {
2226		vpe_err(dev, "Failed to init mem2mem device\n");
2227		ret = PTR_ERR(dev->m2m_dev);
2228		goto rel_ctx;
2229	}
2230
2231	pm_runtime_enable(&pdev->dev);
2232
2233	ret = vpe_runtime_get(pdev);
2234	if (ret)
2235		goto rel_m2m;
2236
2237	/* Perform clk enable followed by reset */
2238	vpe_set_clock_enable(dev, 1);
2239
2240	vpe_top_reset(dev);
2241
2242	func = read_field_reg(dev, VPE_PID, VPE_PID_FUNC_MASK,
2243		VPE_PID_FUNC_SHIFT);
2244	vpe_dbg(dev, "VPE PID function %x\n", func);
2245
2246	vpe_top_vpdma_reset(dev);
2247
2248	dev->sc = sc_create(pdev);
2249	if (IS_ERR(dev->sc)) {
2250		ret = PTR_ERR(dev->sc);
2251		goto runtime_put;
2252	}
2253
2254	dev->csc = csc_create(pdev);
2255	if (IS_ERR(dev->csc)) {
2256		ret = PTR_ERR(dev->csc);
2257		goto runtime_put;
2258	}
2259
2260	dev->vpdma = vpdma_create(pdev, vpe_fw_cb);
2261	if (IS_ERR(dev->vpdma)) {
2262		ret = PTR_ERR(dev->vpdma);
2263		goto runtime_put;
2264	}
2265
2266	return 0;
2267
2268runtime_put:
2269	vpe_runtime_put(pdev);
2270rel_m2m:
2271	pm_runtime_disable(&pdev->dev);
2272	v4l2_m2m_release(dev->m2m_dev);
2273rel_ctx:
2274	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
2275v4l2_dev_unreg:
2276	v4l2_device_unregister(&dev->v4l2_dev);
2277
2278	return ret;
2279}
2280
2281static int vpe_remove(struct platform_device *pdev)
2282{
2283	struct vpe_dev *dev = platform_get_drvdata(pdev);
2284
2285	v4l2_info(&dev->v4l2_dev, "Removing " VPE_MODULE_NAME);
2286
2287	v4l2_m2m_release(dev->m2m_dev);
2288	video_unregister_device(&dev->vfd);
2289	v4l2_device_unregister(&dev->v4l2_dev);
2290	vb2_dma_contig_cleanup_ctx(dev->alloc_ctx);
2291
2292	vpe_set_clock_enable(dev, 0);
2293	vpe_runtime_put(pdev);
2294	pm_runtime_disable(&pdev->dev);
2295
2296	return 0;
2297}
2298
2299#if defined(CONFIG_OF)
2300static const struct of_device_id vpe_of_match[] = {
2301	{
2302		.compatible = "ti,vpe",
2303	},
2304	{},
2305};
2306#endif
2307
2308static struct platform_driver vpe_pdrv = {
2309	.probe		= vpe_probe,
2310	.remove		= vpe_remove,
2311	.driver		= {
2312		.name	= VPE_MODULE_NAME,
2313		.of_match_table = of_match_ptr(vpe_of_match),
2314	},
2315};
2316
2317module_platform_driver(vpe_pdrv);
2318
2319MODULE_DESCRIPTION("TI VPE driver");
2320MODULE_AUTHOR("Dale Farnsworth, <dale@farnsworth.org>");
2321MODULE_LICENSE("GPL");
2322