]> err.no Git - linux-2.6/blob - drivers/char/drm/r300_cmdbuf.c
329733a48b64d1daee78e8f2c43495e0a0ccfb7e
[linux-2.6] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         return 0;
140 }
141
142 static u8 r300_reg_flags[0x10000 >> 2];
143
144 void r300_init_reg_flags(struct drm_device *dev)
145 {
146         int i;
147         drm_radeon_private_t *dev_priv = dev->dev_private;
148
149         memset(r300_reg_flags, 0, 0x10000 >> 2);
150 #define ADD_RANGE_MARK(reg, count,mark) \
151                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
152                         r300_reg_flags[i]|=(mark);
153
154 #define MARK_SAFE               1
155 #define MARK_CHECK_OFFSET       2
156
157 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
158
159         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
160         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
161         ADD_RANGE(R300_VAP_CNTL, 1);
162         ADD_RANGE(R300_SE_VTE_CNTL, 2);
163         ADD_RANGE(0x2134, 2);
164         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
165         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
166         ADD_RANGE(0x21DC, 1);
167         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
168         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
169         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
170         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
171         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
172         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
173         ADD_RANGE(R300_GB_ENABLE, 1);
174         ADD_RANGE(R300_GB_MSPOS0, 5);
175         ADD_RANGE(R300_TX_CNTL, 1);
176         ADD_RANGE(R300_TX_ENABLE, 1);
177         ADD_RANGE(0x4200, 4);
178         ADD_RANGE(0x4214, 1);
179         ADD_RANGE(R300_RE_POINTSIZE, 1);
180         ADD_RANGE(0x4230, 3);
181         ADD_RANGE(R300_RE_LINE_CNT, 1);
182         ADD_RANGE(R300_RE_UNK4238, 1);
183         ADD_RANGE(0x4260, 3);
184         ADD_RANGE(R300_RE_SHADE, 4);
185         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
186         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
187         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
188         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
189         ADD_RANGE(R300_RE_CULL_CNTL, 1);
190         ADD_RANGE(0x42C0, 2);
191         ADD_RANGE(R300_RS_CNTL_0, 2);
192
193         ADD_RANGE(0x43A4, 2);
194         ADD_RANGE(0x43E8, 1);
195
196         ADD_RANGE(0x46A4, 5);
197
198         ADD_RANGE(R300_RE_FOG_STATE, 1);
199         ADD_RANGE(R300_FOG_COLOR_R, 3);
200         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
201         ADD_RANGE(0x4BD8, 1);
202         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
203         ADD_RANGE(0x4E00, 1);
204         ADD_RANGE(R300_RB3D_CBLEND, 2);
205         ADD_RANGE(R300_RB3D_COLORMASK, 1);
206         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
207         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
208         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
209         ADD_RANGE(0x4E50, 9);
210         ADD_RANGE(0x4E88, 1);
211         ADD_RANGE(0x4EA0, 2);
212         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
213         ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
214         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
215         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
216         ADD_RANGE(0x4F28, 1);
217         ADD_RANGE(0x4F30, 2);
218         ADD_RANGE(0x4F44, 1);
219         ADD_RANGE(0x4F54, 1);
220
221         ADD_RANGE(R300_TX_FILTER_0, 16);
222         ADD_RANGE(R300_TX_FILTER1_0, 16);
223         ADD_RANGE(R300_TX_SIZE_0, 16);
224         ADD_RANGE(R300_TX_FORMAT_0, 16);
225         ADD_RANGE(R300_TX_PITCH_0, 16);
226         /* Texture offset is dangerous and needs more checking */
227         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
228         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
229         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
230
231         /* Sporadic registers used as primitives are emitted */
232         ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
233         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
234         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
235         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
236
237         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
238                 ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
239                 ADD_RANGE(R500_US_CONFIG, 2);
240                 ADD_RANGE(R500_US_CODE_ADDR, 3);
241                 ADD_RANGE(R500_US_FC_CTRL, 1);
242                 ADD_RANGE(R500_RS_IP_0, 16);
243                 ADD_RANGE(R500_RS_INST_0, 16);
244                 ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
245                 ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
246         } else {
247                 ADD_RANGE(R300_PFS_CNTL_0, 3);
248                 ADD_RANGE(R300_PFS_NODE_0, 4);
249                 ADD_RANGE(R300_PFS_TEXI_0, 64);
250                 ADD_RANGE(R300_PFS_INSTR0_0, 64);
251                 ADD_RANGE(R300_PFS_INSTR1_0, 64);
252                 ADD_RANGE(R300_PFS_INSTR2_0, 64);
253                 ADD_RANGE(R300_PFS_INSTR3_0, 64);
254                 ADD_RANGE(R300_RS_INTERP_0, 8);
255                 ADD_RANGE(R300_RS_ROUTE_0, 8);
256
257         }
258 }
259
260 static __inline__ int r300_check_range(unsigned reg, int count)
261 {
262         int i;
263         if (reg & ~0xffff)
264                 return -1;
265         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
266                 if (r300_reg_flags[i] != MARK_SAFE)
267                         return 1;
268         return 0;
269 }
270
271 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
272                                                           dev_priv,
273                                                           drm_radeon_kcmd_buffer_t
274                                                           * cmdbuf,
275                                                           drm_r300_cmd_header_t
276                                                           header)
277 {
278         int reg;
279         int sz;
280         int i;
281         int values[64];
282         RING_LOCALS;
283
284         sz = header.packet0.count;
285         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
286
287         if ((sz > 64) || (sz < 0)) {
288                 DRM_ERROR
289                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
290                      reg, sz);
291                 return -EINVAL;
292         }
293         for (i = 0; i < sz; i++) {
294                 values[i] = ((int *)cmdbuf->buf)[i];
295                 switch (r300_reg_flags[(reg >> 2) + i]) {
296                 case MARK_SAFE:
297                         break;
298                 case MARK_CHECK_OFFSET:
299                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
300                                 DRM_ERROR
301                                     ("Offset failed range check (reg=%04x sz=%d)\n",
302                                      reg, sz);
303                                 return -EINVAL;
304                         }
305                         break;
306                 default:
307                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
308                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
309                         return -EINVAL;
310                 }
311         }
312
313         BEGIN_RING(1 + sz);
314         OUT_RING(CP_PACKET0(reg, sz - 1));
315         OUT_RING_TABLE(values, sz);
316         ADVANCE_RING();
317
318         cmdbuf->buf += sz * 4;
319         cmdbuf->bufsz -= sz * 4;
320
321         return 0;
322 }
323
324 /**
325  * Emits a packet0 setting arbitrary registers.
326  * Called by r300_do_cp_cmdbuf.
327  *
328  * Note that checks are performed on contents and addresses of the registers
329  */
330 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
331                                         drm_radeon_kcmd_buffer_t *cmdbuf,
332                                         drm_r300_cmd_header_t header)
333 {
334         int reg;
335         int sz;
336         RING_LOCALS;
337
338         sz = header.packet0.count;
339         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
340
341         if (!sz)
342                 return 0;
343
344         if (sz * 4 > cmdbuf->bufsz)
345                 return -EINVAL;
346
347         if (reg + sz * 4 >= 0x10000) {
348                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
349                           sz);
350                 return -EINVAL;
351         }
352
353         if (r300_check_range(reg, sz)) {
354                 /* go and check everything */
355                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
356                                                            header);
357         }
358         /* the rest of the data is safe to emit, whatever the values the user passed */
359
360         BEGIN_RING(1 + sz);
361         OUT_RING(CP_PACKET0(reg, sz - 1));
362         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
363         ADVANCE_RING();
364
365         cmdbuf->buf += sz * 4;
366         cmdbuf->bufsz -= sz * 4;
367
368         return 0;
369 }
370
371 /**
372  * Uploads user-supplied vertex program instructions or parameters onto
373  * the graphics card.
374  * Called by r300_do_cp_cmdbuf.
375  */
376 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
377                                     drm_radeon_kcmd_buffer_t *cmdbuf,
378                                     drm_r300_cmd_header_t header)
379 {
380         int sz;
381         int addr;
382         RING_LOCALS;
383
384         sz = header.vpu.count;
385         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
386
387         if (!sz)
388                 return 0;
389         if (sz * 16 > cmdbuf->bufsz)
390                 return -EINVAL;
391
392         BEGIN_RING(5 + sz * 4);
393         /* Wait for VAP to come to senses.. */
394         /* there is no need to emit it multiple times, (only once before VAP is programmed,
395            but this optimization is for later */
396         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
397         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
398         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
399         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
400
401         ADVANCE_RING();
402
403         cmdbuf->buf += sz * 16;
404         cmdbuf->bufsz -= sz * 16;
405
406         return 0;
407 }
408
409 /**
410  * Emit a clear packet from userspace.
411  * Called by r300_emit_packet3.
412  */
413 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
414                                       drm_radeon_kcmd_buffer_t *cmdbuf)
415 {
416         RING_LOCALS;
417
418         if (8 * 4 > cmdbuf->bufsz)
419                 return -EINVAL;
420
421         BEGIN_RING(10);
422         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
423         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
424                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
425         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
426         ADVANCE_RING();
427
428         cmdbuf->buf += 8 * 4;
429         cmdbuf->bufsz -= 8 * 4;
430
431         return 0;
432 }
433
434 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
435                                                drm_radeon_kcmd_buffer_t *cmdbuf,
436                                                u32 header)
437 {
438         int count, i, k;
439 #define MAX_ARRAY_PACKET  64
440         u32 payload[MAX_ARRAY_PACKET];
441         u32 narrays;
442         RING_LOCALS;
443
444         count = (header >> 16) & 0x3fff;
445
446         if ((count + 1) > MAX_ARRAY_PACKET) {
447                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
448                           count);
449                 return -EINVAL;
450         }
451         memset(payload, 0, MAX_ARRAY_PACKET * 4);
452         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
453
454         /* carefully check packet contents */
455
456         narrays = payload[0];
457         k = 0;
458         i = 1;
459         while ((k < narrays) && (i < (count + 1))) {
460                 i++;            /* skip attribute field */
461                 if (!radeon_check_offset(dev_priv, payload[i])) {
462                         DRM_ERROR
463                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
464                              k, i);
465                         return -EINVAL;
466                 }
467                 k++;
468                 i++;
469                 if (k == narrays)
470                         break;
471                 /* have one more to process, they come in pairs */
472                 if (!radeon_check_offset(dev_priv, payload[i])) {
473                         DRM_ERROR
474                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
475                              k, i);
476                         return -EINVAL;
477                 }
478                 k++;
479                 i++;
480         }
481         /* do the counts match what we expect ? */
482         if ((k != narrays) || (i != (count + 1))) {
483                 DRM_ERROR
484                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
485                      k, i, narrays, count + 1);
486                 return -EINVAL;
487         }
488
489         /* all clear, output packet */
490
491         BEGIN_RING(count + 2);
492         OUT_RING(header);
493         OUT_RING_TABLE(payload, count + 1);
494         ADVANCE_RING();
495
496         cmdbuf->buf += (count + 2) * 4;
497         cmdbuf->bufsz -= (count + 2) * 4;
498
499         return 0;
500 }
501
502 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
503                                              drm_radeon_kcmd_buffer_t *cmdbuf)
504 {
505         u32 *cmd = (u32 *) cmdbuf->buf;
506         int count, ret;
507         RING_LOCALS;
508
509         count=(cmd[0]>>16) & 0x3fff;
510
511         if (cmd[0] & 0x8000) {
512                 u32 offset;
513
514                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
515                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
516                         offset = cmd[2] << 10;
517                         ret = !radeon_check_offset(dev_priv, offset);
518                         if (ret) {
519                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
520                                 return -EINVAL;
521                         }
522                 }
523
524                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
525                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
526                         offset = cmd[3] << 10;
527                         ret = !radeon_check_offset(dev_priv, offset);
528                         if (ret) {
529                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
530                                 return -EINVAL;
531                         }
532
533                 }
534         }
535
536         BEGIN_RING(count+2);
537         OUT_RING(cmd[0]);
538         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
539         ADVANCE_RING();
540
541         cmdbuf->buf += (count+2)*4;
542         cmdbuf->bufsz -= (count+2)*4;
543
544         return 0;
545 }
546
547 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
548                                              drm_radeon_kcmd_buffer_t *cmdbuf)
549 {
550         u32 *cmd = (u32 *) cmdbuf->buf;
551         int count, ret;
552         RING_LOCALS;
553
554         count=(cmd[0]>>16) & 0x3fff;
555
556         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
557                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
558                 return -EINVAL;
559         }
560         ret = !radeon_check_offset(dev_priv, cmd[2]);
561         if (ret) {
562                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
563                 return -EINVAL;
564         }
565
566         BEGIN_RING(count+2);
567         OUT_RING(cmd[0]);
568         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
569         ADVANCE_RING();
570
571         cmdbuf->buf += (count+2)*4;
572         cmdbuf->bufsz -= (count+2)*4;
573
574         return 0;
575 }
576
577 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
578                                             drm_radeon_kcmd_buffer_t *cmdbuf)
579 {
580         u32 header;
581         int count;
582         RING_LOCALS;
583
584         if (4 > cmdbuf->bufsz)
585                 return -EINVAL;
586
587         /* Fixme !! This simply emits a packet without much checking.
588            We need to be smarter. */
589
590         /* obtain first word - actual packet3 header */
591         header = *(u32 *) cmdbuf->buf;
592
593         /* Is it packet 3 ? */
594         if ((header >> 30) != 0x3) {
595                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
596                 return -EINVAL;
597         }
598
599         count = (header >> 16) & 0x3fff;
600
601         /* Check again now that we know how much data to expect */
602         if ((count + 2) * 4 > cmdbuf->bufsz) {
603                 DRM_ERROR
604                     ("Expected packet3 of length %d but have only %d bytes left\n",
605                      (count + 2) * 4, cmdbuf->bufsz);
606                 return -EINVAL;
607         }
608
609         /* Is it a packet type we know about ? */
610         switch (header & 0xff00) {
611         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
612                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
613
614         case RADEON_CNTL_BITBLT_MULTI:
615                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
616
617         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
618                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
619         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
620         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
621         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
622         case RADEON_WAIT_FOR_IDLE:
623         case RADEON_CP_NOP:
624                 /* these packets are safe */
625                 break;
626         default:
627                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
628                 return -EINVAL;
629         }
630
631         BEGIN_RING(count + 2);
632         OUT_RING(header);
633         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
634         ADVANCE_RING();
635
636         cmdbuf->buf += (count + 2) * 4;
637         cmdbuf->bufsz -= (count + 2) * 4;
638
639         return 0;
640 }
641
642 /**
643  * Emit a rendering packet3 from userspace.
644  * Called by r300_do_cp_cmdbuf.
645  */
646 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
647                                         drm_radeon_kcmd_buffer_t *cmdbuf,
648                                         drm_r300_cmd_header_t header)
649 {
650         int n;
651         int ret;
652         char *orig_buf = cmdbuf->buf;
653         int orig_bufsz = cmdbuf->bufsz;
654
655         /* This is a do-while-loop so that we run the interior at least once,
656          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
657          */
658         n = 0;
659         do {
660                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
661                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
662                         if (ret)
663                                 return ret;
664
665                         cmdbuf->buf = orig_buf;
666                         cmdbuf->bufsz = orig_bufsz;
667                 }
668
669                 switch (header.packet3.packet) {
670                 case R300_CMD_PACKET3_CLEAR:
671                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
672                         ret = r300_emit_clear(dev_priv, cmdbuf);
673                         if (ret) {
674                                 DRM_ERROR("r300_emit_clear failed\n");
675                                 return ret;
676                         }
677                         break;
678
679                 case R300_CMD_PACKET3_RAW:
680                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
681                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
682                         if (ret) {
683                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
684                                 return ret;
685                         }
686                         break;
687
688                 default:
689                         DRM_ERROR("bad packet3 type %i at %p\n",
690                                   header.packet3.packet,
691                                   cmdbuf->buf - sizeof(header));
692                         return -EINVAL;
693                 }
694
695                 n += R300_SIMULTANEOUS_CLIPRECTS;
696         } while (n < cmdbuf->nbox);
697
698         return 0;
699 }
700
701 /* Some of the R300 chips seem to be extremely touchy about the two registers
702  * that are configured in r300_pacify.
703  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
704  * sends a command buffer that contains only state setting commands and a
705  * vertex program/parameter upload sequence, this will eventually lead to a
706  * lockup, unless the sequence is bracketed by calls to r300_pacify.
707  * So we should take great care to *always* call r300_pacify before
708  * *anything* 3D related, and again afterwards. This is what the
709  * call bracket in r300_do_cp_cmdbuf is for.
710  */
711
712 /**
713  * Emit the sequence to pacify R300.
714  */
715 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
716 {
717         RING_LOCALS;
718
719         BEGIN_RING(6);
720         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
721         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
722         OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
723         OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
724         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
725         OUT_RING(0x0);
726         ADVANCE_RING();
727 }
728
729 /**
730  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
731  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
732  * be careful about how this function is called.
733  */
734 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
735 {
736         drm_radeon_private_t *dev_priv = dev->dev_private;
737         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
738
739         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
740         buf->pending = 1;
741         buf->used = 0;
742 }
743
744 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
745                           drm_r300_cmd_header_t header)
746 {
747         u32 wait_until;
748         RING_LOCALS;
749
750         if (!header.wait.flags)
751                 return;
752
753         wait_until = 0;
754
755         switch(header.wait.flags) {
756         case R300_WAIT_2D:
757                 wait_until = RADEON_WAIT_2D_IDLE;
758                 break;
759         case R300_WAIT_3D:
760                 wait_until = RADEON_WAIT_3D_IDLE;
761                 break;
762         case R300_NEW_WAIT_2D_3D:
763                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
764                 break;
765         case R300_NEW_WAIT_2D_2D_CLEAN:
766                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
767                 break;
768         case R300_NEW_WAIT_3D_3D_CLEAN:
769                 wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
770                 break;
771         case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
772                 wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
773                 wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
774                 break;
775         default:
776                 return;
777         }
778
779         BEGIN_RING(2);
780         OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
781         OUT_RING(wait_until);
782         ADVANCE_RING();
783 }
784
785 static int r300_scratch(drm_radeon_private_t *dev_priv,
786                         drm_radeon_kcmd_buffer_t *cmdbuf,
787                         drm_r300_cmd_header_t header)
788 {
789         u32 *ref_age_base;
790         u32 i, buf_idx, h_pending;
791         RING_LOCALS;
792
793         if (cmdbuf->bufsz <
794             (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
795                 return -EINVAL;
796         }
797
798         if (header.scratch.reg >= 5) {
799                 return -EINVAL;
800         }
801
802         dev_priv->scratch_ages[header.scratch.reg]++;
803
804         ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
805
806         cmdbuf->buf += sizeof(u64);
807         cmdbuf->bufsz -= sizeof(u64);
808
809         for (i=0; i < header.scratch.n_bufs; i++) {
810                 buf_idx = *(u32 *)cmdbuf->buf;
811                 buf_idx *= 2; /* 8 bytes per buf */
812
813                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
814                         return -EINVAL;
815                 }
816
817                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
818                         return -EINVAL;
819                 }
820
821                 if (h_pending == 0) {
822                         return -EINVAL;
823                 }
824
825                 h_pending--;
826
827                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
828                         return -EINVAL;
829                 }
830
831                 cmdbuf->buf += sizeof(buf_idx);
832                 cmdbuf->bufsz -= sizeof(buf_idx);
833         }
834
835         BEGIN_RING(2);
836         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
837         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
838         ADVANCE_RING();
839
840         return 0;
841 }
842
843 /**
844  * Uploads user-supplied vertex program instructions or parameters onto
845  * the graphics card.
846  * Called by r300_do_cp_cmdbuf.
847  */
848 static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
849                                        drm_radeon_kcmd_buffer_t *cmdbuf,
850                                        drm_r300_cmd_header_t header)
851 {
852         int sz;
853         int addr;
854         int type;
855         int clamp;
856         int stride;
857         RING_LOCALS;
858
859         sz = header.r500fp.count;
860         /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
861         addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
862
863         type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
864         clamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
865
866         addr |= (type << 16);
867         addr |= (clamp << 17);
868
869         stride = type ? 4 : 6;
870
871         DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
872         if (!sz)
873                 return 0;
874         if (sz * stride * 4 > cmdbuf->bufsz)
875                 return -EINVAL;
876
877         BEGIN_RING(3 + sz * stride);
878         OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
879         OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
880         OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
881
882         ADVANCE_RING();
883
884         cmdbuf->buf += sz * stride * 4;
885         cmdbuf->bufsz -= sz * stride * 4;
886
887         return 0;
888 }
889
890
891 /**
892  * Parses and validates a user-supplied command buffer and emits appropriate
893  * commands on the DMA ring buffer.
894  * Called by the ioctl handler function radeon_cp_cmdbuf.
895  */
896 int r300_do_cp_cmdbuf(struct drm_device *dev,
897                       struct drm_file *file_priv,
898                       drm_radeon_kcmd_buffer_t *cmdbuf)
899 {
900         drm_radeon_private_t *dev_priv = dev->dev_private;
901         struct drm_device_dma *dma = dev->dma;
902         struct drm_buf *buf = NULL;
903         int emit_dispatch_age = 0;
904         int ret = 0;
905
906         DRM_DEBUG("\n");
907
908         /* See the comment above r300_emit_begin3d for why this call must be here,
909          * and what the cleanup gotos are for. */
910         r300_pacify(dev_priv);
911
912         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
913                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
914                 if (ret)
915                         goto cleanup;
916         }
917
918         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
919                 int idx;
920                 drm_r300_cmd_header_t header;
921
922                 header.u = *(unsigned int *)cmdbuf->buf;
923
924                 cmdbuf->buf += sizeof(header);
925                 cmdbuf->bufsz -= sizeof(header);
926
927                 switch (header.header.cmd_type) {
928                 case R300_CMD_PACKET0:
929                         DRM_DEBUG("R300_CMD_PACKET0\n");
930                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
931                         if (ret) {
932                                 DRM_ERROR("r300_emit_packet0 failed\n");
933                                 goto cleanup;
934                         }
935                         break;
936
937                 case R300_CMD_VPU:
938                         DRM_DEBUG("R300_CMD_VPU\n");
939                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
940                         if (ret) {
941                                 DRM_ERROR("r300_emit_vpu failed\n");
942                                 goto cleanup;
943                         }
944                         break;
945
946                 case R300_CMD_PACKET3:
947                         DRM_DEBUG("R300_CMD_PACKET3\n");
948                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
949                         if (ret) {
950                                 DRM_ERROR("r300_emit_packet3 failed\n");
951                                 goto cleanup;
952                         }
953                         break;
954
955                 case R300_CMD_END3D:
956                         DRM_DEBUG("R300_CMD_END3D\n");
957                         /* TODO:
958                            Ideally userspace driver should not need to issue this call,
959                            i.e. the drm driver should issue it automatically and prevent
960                            lockups.
961
962                            In practice, we do not understand why this call is needed and what
963                            it does (except for some vague guesses that it has to do with cache
964                            coherence) and so the user space driver does it.
965
966                            Once we are sure which uses prevent lockups the code could be moved
967                            into the kernel and the userspace driver will not
968                            need to use this command.
969
970                            Note that issuing this command does not hurt anything
971                            except, possibly, performance */
972                         r300_pacify(dev_priv);
973                         break;
974
975                 case R300_CMD_CP_DELAY:
976                         /* simple enough, we can do it here */
977                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
978                         {
979                                 int i;
980                                 RING_LOCALS;
981
982                                 BEGIN_RING(header.delay.count);
983                                 for (i = 0; i < header.delay.count; i++)
984                                         OUT_RING(RADEON_CP_PACKET2);
985                                 ADVANCE_RING();
986                         }
987                         break;
988
989                 case R300_CMD_DMA_DISCARD:
990                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
991                         idx = header.dma.buf_idx;
992                         if (idx < 0 || idx >= dma->buf_count) {
993                                 DRM_ERROR("buffer index %d (of %d max)\n",
994                                           idx, dma->buf_count - 1);
995                                 ret = -EINVAL;
996                                 goto cleanup;
997                         }
998
999                         buf = dma->buflist[idx];
1000                         if (buf->file_priv != file_priv || buf->pending) {
1001                                 DRM_ERROR("bad buffer %p %p %d\n",
1002                                           buf->file_priv, file_priv,
1003                                           buf->pending);
1004                                 ret = -EINVAL;
1005                                 goto cleanup;
1006                         }
1007
1008                         emit_dispatch_age = 1;
1009                         r300_discard_buffer(dev, buf);
1010                         break;
1011
1012                 case R300_CMD_WAIT:
1013                         DRM_DEBUG("R300_CMD_WAIT\n");
1014                         r300_cmd_wait(dev_priv, header);
1015                         break;
1016
1017                 case R300_CMD_SCRATCH:
1018                         DRM_DEBUG("R300_CMD_SCRATCH\n");
1019                         ret = r300_scratch(dev_priv, cmdbuf, header);
1020                         if (ret) {
1021                                 DRM_ERROR("r300_scratch failed\n");
1022                                 goto cleanup;
1023                         }
1024                         break;
1025
1026                 case R300_CMD_R500FP:
1027                         if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1028                                 DRM_ERROR("Calling r500 command on r300 card\n");
1029                                 ret = -EINVAL;
1030                                 goto cleanup;
1031                         }
1032                         DRM_DEBUG("R300_CMD_R500FP\n");
1033                         ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
1034                         if (ret) {
1035                                 DRM_ERROR("r300_emit_r500fp failed\n");
1036                                 goto cleanup;
1037                         }
1038                         break;
1039                 default:
1040                         DRM_ERROR("bad cmd_type %i at %p\n",
1041                                   header.header.cmd_type,
1042                                   cmdbuf->buf - sizeof(header));
1043                         ret = -EINVAL;
1044                         goto cleanup;
1045                 }
1046         }
1047
1048         DRM_DEBUG("END\n");
1049
1050       cleanup:
1051         r300_pacify(dev_priv);
1052
1053         /* We emit the vertex buffer age here, outside the pacifier "brackets"
1054          * for two reasons:
1055          *  (1) This may coalesce multiple age emissions into a single one and
1056          *  (2) more importantly, some chips lock up hard when scratch registers
1057          *      are written inside the pacifier bracket.
1058          */
1059         if (emit_dispatch_age) {
1060                 RING_LOCALS;
1061
1062                 /* Emit the vertex buffer age */
1063                 BEGIN_RING(2);
1064                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
1065                 ADVANCE_RING();
1066         }
1067
1068         COMMIT_RING();
1069
1070         return ret;
1071 }