]> err.no Git - linux-2.6/blob - drivers/char/drm/radeon_state.c
drm: fd.o Bug #7595: Avoid u32 overflows in radeon_check_and_fixup_offset().
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u64 off = *offset;
46         u32 fb_start = dev_priv->fb_location;
47         u32 fb_end = fb_start + dev_priv->fb_size - 1;
48         u32 gart_start = dev_priv->gart_vm_start;
49         u32 gart_end = gart_start + dev_priv->gart_size - 1;
50         struct drm_radeon_driver_file_fields *radeon_priv;
51
52         /* Hrm ... the story of the offset ... So this function converts
53          * the various ideas of what userland clients might have for an
54          * offset in the card address space into an offset into the card
55          * address space :) So with a sane client, it should just keep
56          * the value intact and just do some boundary checking. However,
57          * not all clients are sane. Some older clients pass us 0 based
58          * offsets relative to the start of the framebuffer and some may
59          * assume the AGP aperture it appended to the framebuffer, so we
60          * try to detect those cases and fix them up.
61          *
62          * Note: It might be a good idea here to make sure the offset lands
63          * in some "allowed" area to protect things like the PCIE GART...
64          */
65
66         /* First, the best case, the offset already lands in either the
67          * framebuffer or the GART mapped space
68          */
69         if ((off >= fb_start && off <= fb_end) ||
70             (off >= gart_start && off <= gart_end))
71                 return 0;
72
73         /* Ok, that didn't happen... now check if we have a zero based
74          * offset that fits in the framebuffer + gart space, apply the
75          * magic offset we get from SETPARAM or calculated from fb_location
76          */
77         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
78                 radeon_priv = filp_priv->driver_priv;
79                 off += radeon_priv->radeon_fb_delta;
80         }
81
82         /* Finally, assume we aimed at a GART offset if beyond the fb */
83         if (off > fb_end)
84                 off = off - fb_end - 1 + gart_start;
85
86         /* Now recheck and fail if out of bounds */
87         if ((off >= fb_start && off <= fb_end) ||
88             (off >= gart_start && off <= gart_end)) {
89                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
90                 *offset = off;
91                 return 0;
92         }
93         return DRM_ERR(EINVAL);
94 }
95
96 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
97                                                      dev_priv,
98                                                      drm_file_t * filp_priv,
99                                                      int id, u32 *data)
100 {
101         switch (id) {
102
103         case RADEON_EMIT_PP_MISC:
104                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
105                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
106                         DRM_ERROR("Invalid depth buffer offset\n");
107                         return DRM_ERR(EINVAL);
108                 }
109                 break;
110
111         case RADEON_EMIT_PP_CNTL:
112                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
113                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
114                         DRM_ERROR("Invalid colour buffer offset\n");
115                         return DRM_ERR(EINVAL);
116                 }
117                 break;
118
119         case R200_EMIT_PP_TXOFFSET_0:
120         case R200_EMIT_PP_TXOFFSET_1:
121         case R200_EMIT_PP_TXOFFSET_2:
122         case R200_EMIT_PP_TXOFFSET_3:
123         case R200_EMIT_PP_TXOFFSET_4:
124         case R200_EMIT_PP_TXOFFSET_5:
125                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
126                                                   &data[0])) {
127                         DRM_ERROR("Invalid R200 texture offset\n");
128                         return DRM_ERR(EINVAL);
129                 }
130                 break;
131
132         case RADEON_EMIT_PP_TXFILTER_0:
133         case RADEON_EMIT_PP_TXFILTER_1:
134         case RADEON_EMIT_PP_TXFILTER_2:
135                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
136                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
137                         DRM_ERROR("Invalid R100 texture offset\n");
138                         return DRM_ERR(EINVAL);
139                 }
140                 break;
141
142         case R200_EMIT_PP_CUBIC_OFFSETS_0:
143         case R200_EMIT_PP_CUBIC_OFFSETS_1:
144         case R200_EMIT_PP_CUBIC_OFFSETS_2:
145         case R200_EMIT_PP_CUBIC_OFFSETS_3:
146         case R200_EMIT_PP_CUBIC_OFFSETS_4:
147         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
148                         int i;
149                         for (i = 0; i < 5; i++) {
150                                 if (radeon_check_and_fixup_offset(dev_priv,
151                                                                   filp_priv,
152                                                                   &data[i])) {
153                                         DRM_ERROR
154                                             ("Invalid R200 cubic texture offset\n");
155                                         return DRM_ERR(EINVAL);
156                                 }
157                         }
158                         break;
159                 }
160
161         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
164                         int i;
165                         for (i = 0; i < 5; i++) {
166                                 if (radeon_check_and_fixup_offset(dev_priv,
167                                                                   filp_priv,
168                                                                   &data[i])) {
169                                         DRM_ERROR
170                                             ("Invalid R100 cubic texture offset\n");
171                                         return DRM_ERR(EINVAL);
172                                 }
173                         }
174                 }
175                 break;
176
177         case R200_EMIT_VAP_CTL:{
178                         RING_LOCALS;
179                         BEGIN_RING(2);
180                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
181                         ADVANCE_RING();
182                 }
183                 break;
184
185         case RADEON_EMIT_RB3D_COLORPITCH:
186         case RADEON_EMIT_RE_LINE_PATTERN:
187         case RADEON_EMIT_SE_LINE_WIDTH:
188         case RADEON_EMIT_PP_LUM_MATRIX:
189         case RADEON_EMIT_PP_ROT_MATRIX_0:
190         case RADEON_EMIT_RB3D_STENCILREFMASK:
191         case RADEON_EMIT_SE_VPORT_XSCALE:
192         case RADEON_EMIT_SE_CNTL:
193         case RADEON_EMIT_SE_CNTL_STATUS:
194         case RADEON_EMIT_RE_MISC:
195         case RADEON_EMIT_PP_BORDER_COLOR_0:
196         case RADEON_EMIT_PP_BORDER_COLOR_1:
197         case RADEON_EMIT_PP_BORDER_COLOR_2:
198         case RADEON_EMIT_SE_ZBIAS_FACTOR:
199         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
200         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
201         case R200_EMIT_PP_TXCBLEND_0:
202         case R200_EMIT_PP_TXCBLEND_1:
203         case R200_EMIT_PP_TXCBLEND_2:
204         case R200_EMIT_PP_TXCBLEND_3:
205         case R200_EMIT_PP_TXCBLEND_4:
206         case R200_EMIT_PP_TXCBLEND_5:
207         case R200_EMIT_PP_TXCBLEND_6:
208         case R200_EMIT_PP_TXCBLEND_7:
209         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
210         case R200_EMIT_TFACTOR_0:
211         case R200_EMIT_VTX_FMT_0:
212         case R200_EMIT_MATRIX_SELECT_0:
213         case R200_EMIT_TEX_PROC_CTL_2:
214         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
215         case R200_EMIT_PP_TXFILTER_0:
216         case R200_EMIT_PP_TXFILTER_1:
217         case R200_EMIT_PP_TXFILTER_2:
218         case R200_EMIT_PP_TXFILTER_3:
219         case R200_EMIT_PP_TXFILTER_4:
220         case R200_EMIT_PP_TXFILTER_5:
221         case R200_EMIT_VTE_CNTL:
222         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
223         case R200_EMIT_PP_TAM_DEBUG3:
224         case R200_EMIT_PP_CNTL_X:
225         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
226         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
227         case R200_EMIT_RE_SCISSOR_TL_0:
228         case R200_EMIT_RE_SCISSOR_TL_1:
229         case R200_EMIT_RE_SCISSOR_TL_2:
230         case R200_EMIT_SE_VAP_CNTL_STATUS:
231         case R200_EMIT_SE_VTX_STATE_CNTL:
232         case R200_EMIT_RE_POINTSIZE:
233         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
234         case R200_EMIT_PP_CUBIC_FACES_0:
235         case R200_EMIT_PP_CUBIC_FACES_1:
236         case R200_EMIT_PP_CUBIC_FACES_2:
237         case R200_EMIT_PP_CUBIC_FACES_3:
238         case R200_EMIT_PP_CUBIC_FACES_4:
239         case R200_EMIT_PP_CUBIC_FACES_5:
240         case RADEON_EMIT_PP_TEX_SIZE_0:
241         case RADEON_EMIT_PP_TEX_SIZE_1:
242         case RADEON_EMIT_PP_TEX_SIZE_2:
243         case R200_EMIT_RB3D_BLENDCOLOR:
244         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
245         case RADEON_EMIT_PP_CUBIC_FACES_0:
246         case RADEON_EMIT_PP_CUBIC_FACES_1:
247         case RADEON_EMIT_PP_CUBIC_FACES_2:
248         case R200_EMIT_PP_TRI_PERF_CNTL:
249         case R200_EMIT_PP_AFS_0:
250         case R200_EMIT_PP_AFS_1:
251         case R200_EMIT_ATF_TFACTOR:
252         case R200_EMIT_PP_TXCTLALL_0:
253         case R200_EMIT_PP_TXCTLALL_1:
254         case R200_EMIT_PP_TXCTLALL_2:
255         case R200_EMIT_PP_TXCTLALL_3:
256         case R200_EMIT_PP_TXCTLALL_4:
257         case R200_EMIT_PP_TXCTLALL_5:
258         case R200_EMIT_VAP_PVS_CNTL:
259                 /* These packets don't contain memory offsets */
260                 break;
261
262         default:
263                 DRM_ERROR("Unknown state packet ID %d\n", id);
264                 return DRM_ERR(EINVAL);
265         }
266
267         return 0;
268 }
269
270 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
271                                                      dev_priv,
272                                                      drm_file_t *filp_priv,
273                                                      drm_radeon_kcmd_buffer_t *
274                                                      cmdbuf,
275                                                      unsigned int *cmdsz)
276 {
277         u32 *cmd = (u32 *) cmdbuf->buf;
278
279         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
280
281         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
282                 DRM_ERROR("Not a type 3 packet\n");
283                 return DRM_ERR(EINVAL);
284         }
285
286         if (4 * *cmdsz > cmdbuf->bufsz) {
287                 DRM_ERROR("Packet size larger than size of data provided\n");
288                 return DRM_ERR(EINVAL);
289         }
290
291         /* Check client state and fix it up if necessary */
292         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
293                 u32 offset;
294
295                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
296                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
297                         offset = cmd[2] << 10;
298                         if (radeon_check_and_fixup_offset
299                             (dev_priv, filp_priv, &offset)) {
300                                 DRM_ERROR("Invalid first packet offset\n");
301                                 return DRM_ERR(EINVAL);
302                         }
303                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
304                 }
305
306                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
307                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
308                         offset = cmd[3] << 10;
309                         if (radeon_check_and_fixup_offset
310                             (dev_priv, filp_priv, &offset)) {
311                                 DRM_ERROR("Invalid second packet offset\n");
312                                 return DRM_ERR(EINVAL);
313                         }
314                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
315                 }
316         }
317
318         return 0;
319 }
320
321 /* ================================================================
322  * CP hardware state programming functions
323  */
324
325 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
326                                              drm_clip_rect_t * box)
327 {
328         RING_LOCALS;
329
330         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
331                   box->x1, box->y1, box->x2, box->y2);
332
333         BEGIN_RING(4);
334         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
335         OUT_RING((box->y1 << 16) | box->x1);
336         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
337         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
338         ADVANCE_RING();
339 }
340
341 /* Emit 1.1 state
342  */
343 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
344                              drm_file_t * filp_priv,
345                              drm_radeon_context_regs_t * ctx,
346                              drm_radeon_texture_regs_t * tex,
347                              unsigned int dirty)
348 {
349         RING_LOCALS;
350         DRM_DEBUG("dirty=0x%08x\n", dirty);
351
352         if (dirty & RADEON_UPLOAD_CONTEXT) {
353                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354                                                   &ctx->rb3d_depthoffset)) {
355                         DRM_ERROR("Invalid depth buffer offset\n");
356                         return DRM_ERR(EINVAL);
357                 }
358
359                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
360                                                   &ctx->rb3d_coloroffset)) {
361                         DRM_ERROR("Invalid depth buffer offset\n");
362                         return DRM_ERR(EINVAL);
363                 }
364
365                 BEGIN_RING(14);
366                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
367                 OUT_RING(ctx->pp_misc);
368                 OUT_RING(ctx->pp_fog_color);
369                 OUT_RING(ctx->re_solid_color);
370                 OUT_RING(ctx->rb3d_blendcntl);
371                 OUT_RING(ctx->rb3d_depthoffset);
372                 OUT_RING(ctx->rb3d_depthpitch);
373                 OUT_RING(ctx->rb3d_zstencilcntl);
374                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
375                 OUT_RING(ctx->pp_cntl);
376                 OUT_RING(ctx->rb3d_cntl);
377                 OUT_RING(ctx->rb3d_coloroffset);
378                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
379                 OUT_RING(ctx->rb3d_colorpitch);
380                 ADVANCE_RING();
381         }
382
383         if (dirty & RADEON_UPLOAD_VERTFMT) {
384                 BEGIN_RING(2);
385                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
386                 OUT_RING(ctx->se_coord_fmt);
387                 ADVANCE_RING();
388         }
389
390         if (dirty & RADEON_UPLOAD_LINE) {
391                 BEGIN_RING(5);
392                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
393                 OUT_RING(ctx->re_line_pattern);
394                 OUT_RING(ctx->re_line_state);
395                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
396                 OUT_RING(ctx->se_line_width);
397                 ADVANCE_RING();
398         }
399
400         if (dirty & RADEON_UPLOAD_BUMPMAP) {
401                 BEGIN_RING(5);
402                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
403                 OUT_RING(ctx->pp_lum_matrix);
404                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
405                 OUT_RING(ctx->pp_rot_matrix_0);
406                 OUT_RING(ctx->pp_rot_matrix_1);
407                 ADVANCE_RING();
408         }
409
410         if (dirty & RADEON_UPLOAD_MASKS) {
411                 BEGIN_RING(4);
412                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
413                 OUT_RING(ctx->rb3d_stencilrefmask);
414                 OUT_RING(ctx->rb3d_ropcntl);
415                 OUT_RING(ctx->rb3d_planemask);
416                 ADVANCE_RING();
417         }
418
419         if (dirty & RADEON_UPLOAD_VIEWPORT) {
420                 BEGIN_RING(7);
421                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
422                 OUT_RING(ctx->se_vport_xscale);
423                 OUT_RING(ctx->se_vport_xoffset);
424                 OUT_RING(ctx->se_vport_yscale);
425                 OUT_RING(ctx->se_vport_yoffset);
426                 OUT_RING(ctx->se_vport_zscale);
427                 OUT_RING(ctx->se_vport_zoffset);
428                 ADVANCE_RING();
429         }
430
431         if (dirty & RADEON_UPLOAD_SETUP) {
432                 BEGIN_RING(4);
433                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
434                 OUT_RING(ctx->se_cntl);
435                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
436                 OUT_RING(ctx->se_cntl_status);
437                 ADVANCE_RING();
438         }
439
440         if (dirty & RADEON_UPLOAD_MISC) {
441                 BEGIN_RING(2);
442                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
443                 OUT_RING(ctx->re_misc);
444                 ADVANCE_RING();
445         }
446
447         if (dirty & RADEON_UPLOAD_TEX0) {
448                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
449                                                   &tex[0].pp_txoffset)) {
450                         DRM_ERROR("Invalid texture offset for unit 0\n");
451                         return DRM_ERR(EINVAL);
452                 }
453
454                 BEGIN_RING(9);
455                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
456                 OUT_RING(tex[0].pp_txfilter);
457                 OUT_RING(tex[0].pp_txformat);
458                 OUT_RING(tex[0].pp_txoffset);
459                 OUT_RING(tex[0].pp_txcblend);
460                 OUT_RING(tex[0].pp_txablend);
461                 OUT_RING(tex[0].pp_tfactor);
462                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
463                 OUT_RING(tex[0].pp_border_color);
464                 ADVANCE_RING();
465         }
466
467         if (dirty & RADEON_UPLOAD_TEX1) {
468                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469                                                   &tex[1].pp_txoffset)) {
470                         DRM_ERROR("Invalid texture offset for unit 1\n");
471                         return DRM_ERR(EINVAL);
472                 }
473
474                 BEGIN_RING(9);
475                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
476                 OUT_RING(tex[1].pp_txfilter);
477                 OUT_RING(tex[1].pp_txformat);
478                 OUT_RING(tex[1].pp_txoffset);
479                 OUT_RING(tex[1].pp_txcblend);
480                 OUT_RING(tex[1].pp_txablend);
481                 OUT_RING(tex[1].pp_tfactor);
482                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
483                 OUT_RING(tex[1].pp_border_color);
484                 ADVANCE_RING();
485         }
486
487         if (dirty & RADEON_UPLOAD_TEX2) {
488                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
489                                                   &tex[2].pp_txoffset)) {
490                         DRM_ERROR("Invalid texture offset for unit 2\n");
491                         return DRM_ERR(EINVAL);
492                 }
493
494                 BEGIN_RING(9);
495                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
496                 OUT_RING(tex[2].pp_txfilter);
497                 OUT_RING(tex[2].pp_txformat);
498                 OUT_RING(tex[2].pp_txoffset);
499                 OUT_RING(tex[2].pp_txcblend);
500                 OUT_RING(tex[2].pp_txablend);
501                 OUT_RING(tex[2].pp_tfactor);
502                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
503                 OUT_RING(tex[2].pp_border_color);
504                 ADVANCE_RING();
505         }
506
507         return 0;
508 }
509
510 /* Emit 1.2 state
511  */
512 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
513                               drm_file_t * filp_priv,
514                               drm_radeon_state_t * state)
515 {
516         RING_LOCALS;
517
518         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
519                 BEGIN_RING(3);
520                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
521                 OUT_RING(state->context2.se_zbias_factor);
522                 OUT_RING(state->context2.se_zbias_constant);
523                 ADVANCE_RING();
524         }
525
526         return radeon_emit_state(dev_priv, filp_priv, &state->context,
527                                  state->tex, state->dirty);
528 }
529
530 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
531  * 1.3 cmdbuffers allow all previous state to be updated as well as
532  * the tcl scalar and vector areas.
533  */
534 static struct {
535         int start;
536         int len;
537         const char *name;
538 } packet[RADEON_MAX_STATE_PACKETS] = {
539         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
540         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
541         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
542         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
543         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
544         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
545         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
546         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
547         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
548         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
549         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
550         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
551         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
552         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
553         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
554         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
555         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
556         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
557         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
558         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
559         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
560                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
561         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
562         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
563         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
564         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
565         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
566         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
567         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
568         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
569         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
570         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
571         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
572         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
573         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
574         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
575         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
576         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
577         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
578         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
579         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
580         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
581         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
582         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
583         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
584         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
585         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
586         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
587         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
588         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
589         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
590          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
591         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
592         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
593         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
594         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
595         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
596         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
597         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
598         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
599         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
600         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
601         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
602                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
603         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
604         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
605         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
606         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
607         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
608         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
609         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
610         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
611         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
612         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
613         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
614         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
615         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
616         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
617         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
618         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
619         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
620         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
621         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
622         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
623         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
624         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
625         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
626         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
627         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
628         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
629         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
630         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
631         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
632         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
633         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
634         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
635         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
636         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
637 };
638
639 /* ================================================================
640  * Performance monitoring functions
641  */
642
643 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
644                              int x, int y, int w, int h, int r, int g, int b)
645 {
646         u32 color;
647         RING_LOCALS;
648
649         x += dev_priv->sarea_priv->boxes[0].x1;
650         y += dev_priv->sarea_priv->boxes[0].y1;
651
652         switch (dev_priv->color_fmt) {
653         case RADEON_COLOR_FORMAT_RGB565:
654                 color = (((r & 0xf8) << 8) |
655                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
656                 break;
657         case RADEON_COLOR_FORMAT_ARGB8888:
658         default:
659                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
660                 break;
661         }
662
663         BEGIN_RING(4);
664         RADEON_WAIT_UNTIL_3D_IDLE();
665         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
666         OUT_RING(0xffffffff);
667         ADVANCE_RING();
668
669         BEGIN_RING(6);
670
671         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
672         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
673                  RADEON_GMC_BRUSH_SOLID_COLOR |
674                  (dev_priv->color_fmt << 8) |
675                  RADEON_GMC_SRC_DATATYPE_COLOR |
676                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
677
678         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
679                 OUT_RING(dev_priv->front_pitch_offset);
680         } else {
681                 OUT_RING(dev_priv->back_pitch_offset);
682         }
683
684         OUT_RING(color);
685
686         OUT_RING((x << 16) | y);
687         OUT_RING((w << 16) | h);
688
689         ADVANCE_RING();
690 }
691
692 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
693 {
694         /* Collapse various things into a wait flag -- trying to
695          * guess if userspase slept -- better just to have them tell us.
696          */
697         if (dev_priv->stats.last_frame_reads > 1 ||
698             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
699                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
700         }
701
702         if (dev_priv->stats.freelist_loops) {
703                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
704         }
705
706         /* Purple box for page flipping
707          */
708         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
709                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
710
711         /* Red box if we have to wait for idle at any point
712          */
713         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
714                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
715
716         /* Blue box: lost context?
717          */
718
719         /* Yellow box for texture swaps
720          */
721         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
722                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
723
724         /* Green box if hardware never idles (as far as we can tell)
725          */
726         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
727                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
728
729         /* Draw bars indicating number of buffers allocated
730          * (not a great measure, easily confused)
731          */
732         if (dev_priv->stats.requested_bufs) {
733                 if (dev_priv->stats.requested_bufs > 100)
734                         dev_priv->stats.requested_bufs = 100;
735
736                 radeon_clear_box(dev_priv, 4, 16,
737                                  dev_priv->stats.requested_bufs, 4,
738                                  196, 128, 128);
739         }
740
741         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
742
743 }
744
745 /* ================================================================
746  * CP command dispatch functions
747  */
748
749 static void radeon_cp_dispatch_clear(drm_device_t * dev,
750                                      drm_radeon_clear_t * clear,
751                                      drm_radeon_clear_rect_t * depth_boxes)
752 {
753         drm_radeon_private_t *dev_priv = dev->dev_private;
754         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
755         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
756         int nbox = sarea_priv->nbox;
757         drm_clip_rect_t *pbox = sarea_priv->boxes;
758         unsigned int flags = clear->flags;
759         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
760         int i;
761         RING_LOCALS;
762         DRM_DEBUG("flags = 0x%x\n", flags);
763
764         dev_priv->stats.clears++;
765
766         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
767                 unsigned int tmp = flags;
768
769                 flags &= ~(RADEON_FRONT | RADEON_BACK);
770                 if (tmp & RADEON_FRONT)
771                         flags |= RADEON_BACK;
772                 if (tmp & RADEON_BACK)
773                         flags |= RADEON_FRONT;
774         }
775
776         if (flags & (RADEON_FRONT | RADEON_BACK)) {
777
778                 BEGIN_RING(4);
779
780                 /* Ensure the 3D stream is idle before doing a
781                  * 2D fill to clear the front or back buffer.
782                  */
783                 RADEON_WAIT_UNTIL_3D_IDLE();
784
785                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
786                 OUT_RING(clear->color_mask);
787
788                 ADVANCE_RING();
789
790                 /* Make sure we restore the 3D state next time.
791                  */
792                 dev_priv->sarea_priv->ctx_owner = 0;
793
794                 for (i = 0; i < nbox; i++) {
795                         int x = pbox[i].x1;
796                         int y = pbox[i].y1;
797                         int w = pbox[i].x2 - x;
798                         int h = pbox[i].y2 - y;
799
800                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
801                                   x, y, w, h, flags);
802
803                         if (flags & RADEON_FRONT) {
804                                 BEGIN_RING(6);
805
806                                 OUT_RING(CP_PACKET3
807                                          (RADEON_CNTL_PAINT_MULTI, 4));
808                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
809                                          RADEON_GMC_BRUSH_SOLID_COLOR |
810                                          (dev_priv->
811                                           color_fmt << 8) |
812                                          RADEON_GMC_SRC_DATATYPE_COLOR |
813                                          RADEON_ROP3_P |
814                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
815
816                                 OUT_RING(dev_priv->front_pitch_offset);
817                                 OUT_RING(clear->clear_color);
818
819                                 OUT_RING((x << 16) | y);
820                                 OUT_RING((w << 16) | h);
821
822                                 ADVANCE_RING();
823                         }
824
825                         if (flags & RADEON_BACK) {
826                                 BEGIN_RING(6);
827
828                                 OUT_RING(CP_PACKET3
829                                          (RADEON_CNTL_PAINT_MULTI, 4));
830                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
831                                          RADEON_GMC_BRUSH_SOLID_COLOR |
832                                          (dev_priv->
833                                           color_fmt << 8) |
834                                          RADEON_GMC_SRC_DATATYPE_COLOR |
835                                          RADEON_ROP3_P |
836                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
837
838                                 OUT_RING(dev_priv->back_pitch_offset);
839                                 OUT_RING(clear->clear_color);
840
841                                 OUT_RING((x << 16) | y);
842                                 OUT_RING((w << 16) | h);
843
844                                 ADVANCE_RING();
845                         }
846                 }
847         }
848
849         /* hyper z clear */
850         /* no docs available, based on reverse engeneering by Stephane Marchesin */
851         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
852             && (flags & RADEON_CLEAR_FASTZ)) {
853
854                 int i;
855                 int depthpixperline =
856                     dev_priv->depth_fmt ==
857                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
858                                                        2) : (dev_priv->
859                                                              depth_pitch / 4);
860
861                 u32 clearmask;
862
863                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
864                     ((clear->depth_mask & 0xff) << 24);
865
866                 /* Make sure we restore the 3D state next time.
867                  * we haven't touched any "normal" state - still need this?
868                  */
869                 dev_priv->sarea_priv->ctx_owner = 0;
870
871                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
872                     && (flags & RADEON_USE_HIERZ)) {
873                         /* FIXME : reverse engineer that for Rx00 cards */
874                         /* FIXME : the mask supposedly contains low-res z values. So can't set
875                            just to the max (0xff? or actually 0x3fff?), need to take z clear
876                            value into account? */
877                         /* pattern seems to work for r100, though get slight
878                            rendering errors with glxgears. If hierz is not enabled for r100,
879                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
880                            other ones are ignored, and the same clear mask can be used. That's
881                            very different behaviour than R200 which needs different clear mask
882                            and different number of tiles to clear if hierz is enabled or not !?!
883                          */
884                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
885                 } else {
886                         /* clear mask : chooses the clearing pattern.
887                            rv250: could be used to clear only parts of macrotiles
888                            (but that would get really complicated...)?
889                            bit 0 and 1 (either or both of them ?!?!) are used to
890                            not clear tile (or maybe one of the bits indicates if the tile is
891                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
892                            Pattern is as follows:
893                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
894                            bits -------------------------------------------------
895                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
896                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
897                            covers 256 pixels ?!?
898                          */
899                         clearmask = 0x0;
900                 }
901
902                 BEGIN_RING(8);
903                 RADEON_WAIT_UNTIL_2D_IDLE();
904                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
905                              tempRB3D_DEPTHCLEARVALUE);
906                 /* what offset is this exactly ? */
907                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
908                 /* need ctlstat, otherwise get some strange black flickering */
909                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
910                              RADEON_RB3D_ZC_FLUSH_ALL);
911                 ADVANCE_RING();
912
913                 for (i = 0; i < nbox; i++) {
914                         int tileoffset, nrtilesx, nrtilesy, j;
915                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
916                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
917                             && !(dev_priv->microcode_version == UCODE_R200)) {
918                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
919                                    maybe r200 actually doesn't need to put the low-res z value into
920                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
921                                    Works for R100, both with hierz and without.
922                                    R100 seems to operate on 2x1 8x8 tiles, but...
923                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
924                                    problematic with resolutions which are not 64 pix aligned? */
925                                 tileoffset =
926                                     ((pbox[i].y1 >> 3) * depthpixperline +
927                                      pbox[i].x1) >> 6;
928                                 nrtilesx =
929                                     ((pbox[i].x2 & ~63) -
930                                      (pbox[i].x1 & ~63)) >> 4;
931                                 nrtilesy =
932                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
933                                 for (j = 0; j <= nrtilesy; j++) {
934                                         BEGIN_RING(4);
935                                         OUT_RING(CP_PACKET3
936                                                  (RADEON_3D_CLEAR_ZMASK, 2));
937                                         /* first tile */
938                                         OUT_RING(tileoffset * 8);
939                                         /* the number of tiles to clear */
940                                         OUT_RING(nrtilesx + 4);
941                                         /* clear mask : chooses the clearing pattern. */
942                                         OUT_RING(clearmask);
943                                         ADVANCE_RING();
944                                         tileoffset += depthpixperline >> 6;
945                                 }
946                         } else if (dev_priv->microcode_version == UCODE_R200) {
947                                 /* works for rv250. */
948                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
949                                 tileoffset =
950                                     ((pbox[i].y1 >> 3) * depthpixperline +
951                                      pbox[i].x1) >> 5;
952                                 nrtilesx =
953                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
954                                 nrtilesy =
955                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
956                                 for (j = 0; j <= nrtilesy; j++) {
957                                         BEGIN_RING(4);
958                                         OUT_RING(CP_PACKET3
959                                                  (RADEON_3D_CLEAR_ZMASK, 2));
960                                         /* first tile */
961                                         /* judging by the first tile offset needed, could possibly
962                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
963                                            macro tiles, though would still need clear mask for
964                                            right/bottom if truely 4x4 granularity is desired ? */
965                                         OUT_RING(tileoffset * 16);
966                                         /* the number of tiles to clear */
967                                         OUT_RING(nrtilesx + 1);
968                                         /* clear mask : chooses the clearing pattern. */
969                                         OUT_RING(clearmask);
970                                         ADVANCE_RING();
971                                         tileoffset += depthpixperline >> 5;
972                                 }
973                         } else {        /* rv 100 */
974                                 /* rv100 might not need 64 pix alignment, who knows */
975                                 /* offsets are, hmm, weird */
976                                 tileoffset =
977                                     ((pbox[i].y1 >> 4) * depthpixperline +
978                                      pbox[i].x1) >> 6;
979                                 nrtilesx =
980                                     ((pbox[i].x2 & ~63) -
981                                      (pbox[i].x1 & ~63)) >> 4;
982                                 nrtilesy =
983                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
984                                 for (j = 0; j <= nrtilesy; j++) {
985                                         BEGIN_RING(4);
986                                         OUT_RING(CP_PACKET3
987                                                  (RADEON_3D_CLEAR_ZMASK, 2));
988                                         OUT_RING(tileoffset * 128);
989                                         /* the number of tiles to clear */
990                                         OUT_RING(nrtilesx + 4);
991                                         /* clear mask : chooses the clearing pattern. */
992                                         OUT_RING(clearmask);
993                                         ADVANCE_RING();
994                                         tileoffset += depthpixperline >> 6;
995                                 }
996                         }
997                 }
998
999                 /* TODO don't always clear all hi-level z tiles */
1000                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
1001                     && (dev_priv->microcode_version == UCODE_R200)
1002                     && (flags & RADEON_USE_HIERZ))
1003                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1004                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1005                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1006                            value into account? */
1007                 {
1008                         BEGIN_RING(4);
1009                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1010                         OUT_RING(0x0);  /* First tile */
1011                         OUT_RING(0x3cc0);
1012                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1013                         ADVANCE_RING();
1014                 }
1015         }
1016
1017         /* We have to clear the depth and/or stencil buffers by
1018          * rendering a quad into just those buffers.  Thus, we have to
1019          * make sure the 3D engine is configured correctly.
1020          */
1021         else if ((dev_priv->microcode_version == UCODE_R200) &&
1022                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1023
1024                 int tempPP_CNTL;
1025                 int tempRE_CNTL;
1026                 int tempRB3D_CNTL;
1027                 int tempRB3D_ZSTENCILCNTL;
1028                 int tempRB3D_STENCILREFMASK;
1029                 int tempRB3D_PLANEMASK;
1030                 int tempSE_CNTL;
1031                 int tempSE_VTE_CNTL;
1032                 int tempSE_VTX_FMT_0;
1033                 int tempSE_VTX_FMT_1;
1034                 int tempSE_VAP_CNTL;
1035                 int tempRE_AUX_SCISSOR_CNTL;
1036
1037                 tempPP_CNTL = 0;
1038                 tempRE_CNTL = 0;
1039
1040                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1041
1042                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1043                 tempRB3D_STENCILREFMASK = 0x0;
1044
1045                 tempSE_CNTL = depth_clear->se_cntl;
1046
1047                 /* Disable TCL */
1048
1049                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1050                                           (0x9 <<
1051                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1052
1053                 tempRB3D_PLANEMASK = 0x0;
1054
1055                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1056
1057                 tempSE_VTE_CNTL =
1058                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1059
1060                 /* Vertex format (X, Y, Z, W) */
1061                 tempSE_VTX_FMT_0 =
1062                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1063                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1064                 tempSE_VTX_FMT_1 = 0x0;
1065
1066                 /*
1067                  * Depth buffer specific enables
1068                  */
1069                 if (flags & RADEON_DEPTH) {
1070                         /* Enable depth buffer */
1071                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1072                 } else {
1073                         /* Disable depth buffer */
1074                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1075                 }
1076
1077                 /*
1078                  * Stencil buffer specific enables
1079                  */
1080                 if (flags & RADEON_STENCIL) {
1081                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1082                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1083                 } else {
1084                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1085                         tempRB3D_STENCILREFMASK = 0x00000000;
1086                 }
1087
1088                 if (flags & RADEON_USE_COMP_ZBUF) {
1089                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090                             RADEON_Z_DECOMPRESSION_ENABLE;
1091                 }
1092                 if (flags & RADEON_USE_HIERZ) {
1093                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1094                 }
1095
1096                 BEGIN_RING(26);
1097                 RADEON_WAIT_UNTIL_2D_IDLE();
1098
1099                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1100                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1101                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1102                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1103                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1104                              tempRB3D_STENCILREFMASK);
1105                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1106                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1107                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1108                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1109                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1110                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1111                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1112                 ADVANCE_RING();
1113
1114                 /* Make sure we restore the 3D state next time.
1115                  */
1116                 dev_priv->sarea_priv->ctx_owner = 0;
1117
1118                 for (i = 0; i < nbox; i++) {
1119
1120                         /* Funny that this should be required --
1121                          *  sets top-left?
1122                          */
1123                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1124
1125                         BEGIN_RING(14);
1126                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1127                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1128                                   RADEON_PRIM_WALK_RING |
1129                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1130                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1131                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133                         OUT_RING(0x3f800000);
1134                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1136                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137                         OUT_RING(0x3f800000);
1138                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1139                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1140                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141                         OUT_RING(0x3f800000);
1142                         ADVANCE_RING();
1143                 }
1144         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1145
1146                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1147
1148                 rb3d_cntl = depth_clear->rb3d_cntl;
1149
1150                 if (flags & RADEON_DEPTH) {
1151                         rb3d_cntl |= RADEON_Z_ENABLE;
1152                 } else {
1153                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1154                 }
1155
1156                 if (flags & RADEON_STENCIL) {
1157                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1158                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1159                 } else {
1160                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1161                         rb3d_stencilrefmask = 0x00000000;
1162                 }
1163
1164                 if (flags & RADEON_USE_COMP_ZBUF) {
1165                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1166                             RADEON_Z_DECOMPRESSION_ENABLE;
1167                 }
1168                 if (flags & RADEON_USE_HIERZ) {
1169                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1170                 }
1171
1172                 BEGIN_RING(13);
1173                 RADEON_WAIT_UNTIL_2D_IDLE();
1174
1175                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1176                 OUT_RING(0x00000000);
1177                 OUT_RING(rb3d_cntl);
1178
1179                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1180                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1181                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1182                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1183                 ADVANCE_RING();
1184
1185                 /* Make sure we restore the 3D state next time.
1186                  */
1187                 dev_priv->sarea_priv->ctx_owner = 0;
1188
1189                 for (i = 0; i < nbox; i++) {
1190
1191                         /* Funny that this should be required --
1192                          *  sets top-left?
1193                          */
1194                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1195
1196                         BEGIN_RING(15);
1197
1198                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1199                         OUT_RING(RADEON_VTX_Z_PRESENT |
1200                                  RADEON_VTX_PKCOLOR_PRESENT);
1201                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1202                                   RADEON_PRIM_WALK_RING |
1203                                   RADEON_MAOS_ENABLE |
1204                                   RADEON_VTX_FMT_RADEON_MODE |
1205                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1206
1207                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1208                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1209                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1210                         OUT_RING(0x0);
1211
1212                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1213                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1214                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1215                         OUT_RING(0x0);
1216
1217                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1218                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1219                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1220                         OUT_RING(0x0);
1221
1222                         ADVANCE_RING();
1223                 }
1224         }
1225
1226         /* Increment the clear counter.  The client-side 3D driver must
1227          * wait on this value before performing the clear ioctl.  We
1228          * need this because the card's so damned fast...
1229          */
1230         dev_priv->sarea_priv->last_clear++;
1231
1232         BEGIN_RING(4);
1233
1234         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1235         RADEON_WAIT_UNTIL_IDLE();
1236
1237         ADVANCE_RING();
1238 }
1239
1240 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1241 {
1242         drm_radeon_private_t *dev_priv = dev->dev_private;
1243         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1244         int nbox = sarea_priv->nbox;
1245         drm_clip_rect_t *pbox = sarea_priv->boxes;
1246         int i;
1247         RING_LOCALS;
1248         DRM_DEBUG("\n");
1249
1250         /* Do some trivial performance monitoring...
1251          */
1252         if (dev_priv->do_boxes)
1253                 radeon_cp_performance_boxes(dev_priv);
1254
1255         /* Wait for the 3D stream to idle before dispatching the bitblt.
1256          * This will prevent data corruption between the two streams.
1257          */
1258         BEGIN_RING(2);
1259
1260         RADEON_WAIT_UNTIL_3D_IDLE();
1261
1262         ADVANCE_RING();
1263
1264         for (i = 0; i < nbox; i++) {
1265                 int x = pbox[i].x1;
1266                 int y = pbox[i].y1;
1267                 int w = pbox[i].x2 - x;
1268                 int h = pbox[i].y2 - y;
1269
1270                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1271
1272                 BEGIN_RING(7);
1273
1274                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1275                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1276                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1277                          RADEON_GMC_BRUSH_NONE |
1278                          (dev_priv->color_fmt << 8) |
1279                          RADEON_GMC_SRC_DATATYPE_COLOR |
1280                          RADEON_ROP3_S |
1281                          RADEON_DP_SRC_SOURCE_MEMORY |
1282                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1283
1284                 /* Make this work even if front & back are flipped:
1285                  */
1286                 if (dev_priv->current_page == 0) {
1287                         OUT_RING(dev_priv->back_pitch_offset);
1288                         OUT_RING(dev_priv->front_pitch_offset);
1289                 } else {
1290                         OUT_RING(dev_priv->front_pitch_offset);
1291                         OUT_RING(dev_priv->back_pitch_offset);
1292                 }
1293
1294                 OUT_RING((x << 16) | y);
1295                 OUT_RING((x << 16) | y);
1296                 OUT_RING((w << 16) | h);
1297
1298                 ADVANCE_RING();
1299         }
1300
1301         /* Increment the frame counter.  The client-side 3D driver must
1302          * throttle the framerate by waiting for this value before
1303          * performing the swapbuffer ioctl.
1304          */
1305         dev_priv->sarea_priv->last_frame++;
1306
1307         BEGIN_RING(4);
1308
1309         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1310         RADEON_WAIT_UNTIL_2D_IDLE();
1311
1312         ADVANCE_RING();
1313 }
1314
1315 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1316 {
1317         drm_radeon_private_t *dev_priv = dev->dev_private;
1318         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1319         int offset = (dev_priv->current_page == 1)
1320             ? dev_priv->front_offset : dev_priv->back_offset;
1321         RING_LOCALS;
1322         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1323                   __FUNCTION__,
1324                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1325
1326         /* Do some trivial performance monitoring...
1327          */
1328         if (dev_priv->do_boxes) {
1329                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1330                 radeon_cp_performance_boxes(dev_priv);
1331         }
1332
1333         /* Update the frame offsets for both CRTCs
1334          */
1335         BEGIN_RING(6);
1336
1337         RADEON_WAIT_UNTIL_3D_IDLE();
1338         OUT_RING_REG(RADEON_CRTC_OFFSET,
1339                      ((sarea->frame.y * dev_priv->front_pitch +
1340                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1341                      + offset);
1342         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1343                      + offset);
1344
1345         ADVANCE_RING();
1346
1347         /* Increment the frame counter.  The client-side 3D driver must
1348          * throttle the framerate by waiting for this value before
1349          * performing the swapbuffer ioctl.
1350          */
1351         dev_priv->sarea_priv->last_frame++;
1352         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1353             1 - dev_priv->current_page;
1354
1355         BEGIN_RING(2);
1356
1357         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1358
1359         ADVANCE_RING();
1360 }
1361
1362 static int bad_prim_vertex_nr(int primitive, int nr)
1363 {
1364         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1365         case RADEON_PRIM_TYPE_NONE:
1366         case RADEON_PRIM_TYPE_POINT:
1367                 return nr < 1;
1368         case RADEON_PRIM_TYPE_LINE:
1369                 return (nr & 1) || nr == 0;
1370         case RADEON_PRIM_TYPE_LINE_STRIP:
1371                 return nr < 2;
1372         case RADEON_PRIM_TYPE_TRI_LIST:
1373         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1374         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1375         case RADEON_PRIM_TYPE_RECT_LIST:
1376                 return nr % 3 || nr == 0;
1377         case RADEON_PRIM_TYPE_TRI_FAN:
1378         case RADEON_PRIM_TYPE_TRI_STRIP:
1379                 return nr < 3;
1380         default:
1381                 return 1;
1382         }
1383 }
1384
1385 typedef struct {
1386         unsigned int start;
1387         unsigned int finish;
1388         unsigned int prim;
1389         unsigned int numverts;
1390         unsigned int offset;
1391         unsigned int vc_format;
1392 } drm_radeon_tcl_prim_t;
1393
1394 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1395                                       drm_buf_t * buf,
1396                                       drm_radeon_tcl_prim_t * prim)
1397 {
1398         drm_radeon_private_t *dev_priv = dev->dev_private;
1399         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1400         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1401         int numverts = (int)prim->numverts;
1402         int nbox = sarea_priv->nbox;
1403         int i = 0;
1404         RING_LOCALS;
1405
1406         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1407                   prim->prim,
1408                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1409
1410         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1411                 DRM_ERROR("bad prim %x numverts %d\n",
1412                           prim->prim, prim->numverts);
1413                 return;
1414         }
1415
1416         do {
1417                 /* Emit the next cliprect */
1418                 if (i < nbox) {
1419                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1420                 }
1421
1422                 /* Emit the vertex buffer rendering commands */
1423                 BEGIN_RING(5);
1424
1425                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1426                 OUT_RING(offset);
1427                 OUT_RING(numverts);
1428                 OUT_RING(prim->vc_format);
1429                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1430                          RADEON_COLOR_ORDER_RGBA |
1431                          RADEON_VTX_FMT_RADEON_MODE |
1432                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1433
1434                 ADVANCE_RING();
1435
1436                 i++;
1437         } while (i < nbox);
1438 }
1439
1440 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1441 {
1442         drm_radeon_private_t *dev_priv = dev->dev_private;
1443         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1444         RING_LOCALS;
1445
1446         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1447
1448         /* Emit the vertex buffer age */
1449         BEGIN_RING(2);
1450         RADEON_DISPATCH_AGE(buf_priv->age);
1451         ADVANCE_RING();
1452
1453         buf->pending = 1;
1454         buf->used = 0;
1455 }
1456
1457 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1458                                         drm_buf_t * buf, int start, int end)
1459 {
1460         drm_radeon_private_t *dev_priv = dev->dev_private;
1461         RING_LOCALS;
1462         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1463
1464         if (start != end) {
1465                 int offset = (dev_priv->gart_buffers_offset
1466                               + buf->offset + start);
1467                 int dwords = (end - start + 3) / sizeof(u32);
1468
1469                 /* Indirect buffer data must be an even number of
1470                  * dwords, so if we've been given an odd number we must
1471                  * pad the data with a Type-2 CP packet.
1472                  */
1473                 if (dwords & 1) {
1474                         u32 *data = (u32 *)
1475                             ((char *)dev->agp_buffer_map->handle
1476                              + buf->offset + start);
1477                         data[dwords++] = RADEON_CP_PACKET2;
1478                 }
1479
1480                 /* Fire off the indirect buffer */
1481                 BEGIN_RING(3);
1482
1483                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1484                 OUT_RING(offset);
1485                 OUT_RING(dwords);
1486
1487                 ADVANCE_RING();
1488         }
1489 }
1490
1491 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1492                                        drm_buf_t * elt_buf,
1493                                        drm_radeon_tcl_prim_t * prim)
1494 {
1495         drm_radeon_private_t *dev_priv = dev->dev_private;
1496         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1497         int offset = dev_priv->gart_buffers_offset + prim->offset;
1498         u32 *data;
1499         int dwords;
1500         int i = 0;
1501         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1502         int count = (prim->finish - start) / sizeof(u16);
1503         int nbox = sarea_priv->nbox;
1504
1505         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1506                   prim->prim,
1507                   prim->vc_format,
1508                   prim->start, prim->finish, prim->offset, prim->numverts);
1509
1510         if (bad_prim_vertex_nr(prim->prim, count)) {
1511                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1512                 return;
1513         }
1514
1515         if (start >= prim->finish || (prim->start & 0x7)) {
1516                 DRM_ERROR("buffer prim %d\n", prim->prim);
1517                 return;
1518         }
1519
1520         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1521
1522         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1523                         elt_buf->offset + prim->start);
1524
1525         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1526         data[1] = offset;
1527         data[2] = prim->numverts;
1528         data[3] = prim->vc_format;
1529         data[4] = (prim->prim |
1530                    RADEON_PRIM_WALK_IND |
1531                    RADEON_COLOR_ORDER_RGBA |
1532                    RADEON_VTX_FMT_RADEON_MODE |
1533                    (count << RADEON_NUM_VERTICES_SHIFT));
1534
1535         do {
1536                 if (i < nbox)
1537                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1538
1539                 radeon_cp_dispatch_indirect(dev, elt_buf,
1540                                             prim->start, prim->finish);
1541
1542                 i++;
1543         } while (i < nbox);
1544
1545 }
1546
1547 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1548
1549 static int radeon_cp_dispatch_texture(DRMFILE filp,
1550                                       drm_device_t * dev,
1551                                       drm_radeon_texture_t * tex,
1552                                       drm_radeon_tex_image_t * image)
1553 {
1554         drm_radeon_private_t *dev_priv = dev->dev_private;
1555         drm_file_t *filp_priv;
1556         drm_buf_t *buf;
1557         u32 format;
1558         u32 *buffer;
1559         const u8 __user *data;
1560         int size, dwords, tex_width, blit_width, spitch;
1561         u32 height;
1562         int i;
1563         u32 texpitch, microtile;
1564         u32 offset;
1565         RING_LOCALS;
1566
1567         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1568
1569         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1570                 DRM_ERROR("Invalid destination offset\n");
1571                 return DRM_ERR(EINVAL);
1572         }
1573
1574         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1575
1576         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1577          * up with the texture data from the host data blit, otherwise
1578          * part of the texture image may be corrupted.
1579          */
1580         BEGIN_RING(4);
1581         RADEON_FLUSH_CACHE();
1582         RADEON_WAIT_UNTIL_IDLE();
1583         ADVANCE_RING();
1584
1585         /* The compiler won't optimize away a division by a variable,
1586          * even if the only legal values are powers of two.  Thus, we'll
1587          * use a shift instead.
1588          */
1589         switch (tex->format) {
1590         case RADEON_TXFORMAT_ARGB8888:
1591         case RADEON_TXFORMAT_RGBA8888:
1592                 format = RADEON_COLOR_FORMAT_ARGB8888;
1593                 tex_width = tex->width * 4;
1594                 blit_width = image->width * 4;
1595                 break;
1596         case RADEON_TXFORMAT_AI88:
1597         case RADEON_TXFORMAT_ARGB1555:
1598         case RADEON_TXFORMAT_RGB565:
1599         case RADEON_TXFORMAT_ARGB4444:
1600         case RADEON_TXFORMAT_VYUY422:
1601         case RADEON_TXFORMAT_YVYU422:
1602                 format = RADEON_COLOR_FORMAT_RGB565;
1603                 tex_width = tex->width * 2;
1604                 blit_width = image->width * 2;
1605                 break;
1606         case RADEON_TXFORMAT_I8:
1607         case RADEON_TXFORMAT_RGB332:
1608                 format = RADEON_COLOR_FORMAT_CI8;
1609                 tex_width = tex->width * 1;
1610                 blit_width = image->width * 1;
1611                 break;
1612         default:
1613                 DRM_ERROR("invalid texture format %d\n", tex->format);
1614                 return DRM_ERR(EINVAL);
1615         }
1616         spitch = blit_width >> 6;
1617         if (spitch == 0 && image->height > 1)
1618                 return DRM_ERR(EINVAL);
1619
1620         texpitch = tex->pitch;
1621         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1622                 microtile = 1;
1623                 if (tex_width < 64) {
1624                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1625                         /* we got tiled coordinates, untile them */
1626                         image->x *= 2;
1627                 }
1628         } else
1629                 microtile = 0;
1630
1631         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1632
1633         do {
1634                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1635                           tex->offset >> 10, tex->pitch, tex->format,
1636                           image->x, image->y, image->width, image->height);
1637
1638                 /* Make a copy of some parameters in case we have to
1639                  * update them for a multi-pass texture blit.
1640                  */
1641                 height = image->height;
1642                 data = (const u8 __user *)image->data;
1643
1644                 size = height * blit_width;
1645
1646                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1647                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1648                         size = height * blit_width;
1649                 } else if (size < 4 && size > 0) {
1650                         size = 4;
1651                 } else if (size == 0) {
1652                         return 0;
1653                 }
1654
1655                 buf = radeon_freelist_get(dev);
1656                 if (0 && !buf) {
1657                         radeon_do_cp_idle(dev_priv);
1658                         buf = radeon_freelist_get(dev);
1659                 }
1660                 if (!buf) {
1661                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1662                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1663                                 return DRM_ERR(EFAULT);
1664                         return DRM_ERR(EAGAIN);
1665                 }
1666
1667                 /* Dispatch the indirect buffer.
1668                  */
1669                 buffer =
1670                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1671                 dwords = size / 4;
1672
1673 #define RADEON_COPY_MT(_buf, _data, _width) \
1674         do { \
1675                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1676                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1677                         return DRM_ERR(EFAULT); \
1678                 } \
1679         } while(0)
1680
1681                 if (microtile) {
1682                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1683                            however, we cannot use blitter directly for texture width < 64 bytes,
1684                            since minimum tex pitch is 64 bytes and we need this to match
1685                            the texture width, otherwise the blitter will tile it wrong.
1686                            Thus, tiling manually in this case. Additionally, need to special
1687                            case tex height = 1, since our actual image will have height 2
1688                            and we need to ensure we don't read beyond the texture size
1689                            from user space. */
1690                         if (tex->height == 1) {
1691                                 if (tex_width >= 64 || tex_width <= 16) {
1692                                         RADEON_COPY_MT(buffer, data,
1693                                                 (int)(tex_width * sizeof(u32)));
1694                                 } else if (tex_width == 32) {
1695                                         RADEON_COPY_MT(buffer, data, 16);
1696                                         RADEON_COPY_MT(buffer + 8,
1697                                                        data + 16, 16);
1698                                 }
1699                         } else if (tex_width >= 64 || tex_width == 16) {
1700                                 RADEON_COPY_MT(buffer, data,
1701                                                (int)(dwords * sizeof(u32)));
1702                         } else if (tex_width < 16) {
1703                                 for (i = 0; i < tex->height; i++) {
1704                                         RADEON_COPY_MT(buffer, data, tex_width);
1705                                         buffer += 4;
1706                                         data += tex_width;
1707                                 }
1708                         } else if (tex_width == 32) {
1709                                 /* TODO: make sure this works when not fitting in one buffer
1710                                    (i.e. 32bytes x 2048...) */
1711                                 for (i = 0; i < tex->height; i += 2) {
1712                                         RADEON_COPY_MT(buffer, data, 16);
1713                                         data += 16;
1714                                         RADEON_COPY_MT(buffer + 8, data, 16);
1715                                         data += 16;
1716                                         RADEON_COPY_MT(buffer + 4, data, 16);
1717                                         data += 16;
1718                                         RADEON_COPY_MT(buffer + 12, data, 16);
1719                                         data += 16;
1720                                         buffer += 16;
1721                                 }
1722                         }
1723                 } else {
1724                         if (tex_width >= 32) {
1725                                 /* Texture image width is larger than the minimum, so we
1726                                  * can upload it directly.
1727                                  */
1728                                 RADEON_COPY_MT(buffer, data,
1729                                                (int)(dwords * sizeof(u32)));
1730                         } else {
1731                                 /* Texture image width is less than the minimum, so we
1732                                  * need to pad out each image scanline to the minimum
1733                                  * width.
1734                                  */
1735                                 for (i = 0; i < tex->height; i++) {
1736                                         RADEON_COPY_MT(buffer, data, tex_width);
1737                                         buffer += 8;
1738                                         data += tex_width;
1739                                 }
1740                         }
1741                 }
1742
1743 #undef RADEON_COPY_MT
1744                 buf->filp = filp;
1745                 buf->used = size;
1746                 offset = dev_priv->gart_buffers_offset + buf->offset;
1747                 BEGIN_RING(9);
1748                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1749                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1750                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1751                          RADEON_GMC_BRUSH_NONE |
1752                          (format << 8) |
1753                          RADEON_GMC_SRC_DATATYPE_COLOR |
1754                          RADEON_ROP3_S |
1755                          RADEON_DP_SRC_SOURCE_MEMORY |
1756                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1757                 OUT_RING((spitch << 22) | (offset >> 10));
1758                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1759                 OUT_RING(0);
1760                 OUT_RING((image->x << 16) | image->y);
1761                 OUT_RING((image->width << 16) | height);
1762                 RADEON_WAIT_UNTIL_2D_IDLE();
1763                 ADVANCE_RING();
1764
1765                 radeon_cp_discard_buffer(dev, buf);
1766
1767                 /* Update the input parameters for next time */
1768                 image->y += height;
1769                 image->height -= height;
1770                 image->data = (const u8 __user *)image->data + size;
1771         } while (image->height > 0);
1772
1773         /* Flush the pixel cache after the blit completes.  This ensures
1774          * the texture data is written out to memory before rendering
1775          * continues.
1776          */
1777         BEGIN_RING(4);
1778         RADEON_FLUSH_CACHE();
1779         RADEON_WAIT_UNTIL_2D_IDLE();
1780         ADVANCE_RING();
1781         return 0;
1782 }
1783
1784 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1785 {
1786         drm_radeon_private_t *dev_priv = dev->dev_private;
1787         int i;
1788         RING_LOCALS;
1789         DRM_DEBUG("\n");
1790
1791         BEGIN_RING(35);
1792
1793         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1794         OUT_RING(0x00000000);
1795
1796         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1797         for (i = 0; i < 32; i++) {
1798                 OUT_RING(stipple[i]);
1799         }
1800
1801         ADVANCE_RING();
1802 }
1803
1804 static void radeon_apply_surface_regs(int surf_index,
1805                                       drm_radeon_private_t *dev_priv)
1806 {
1807         if (!dev_priv->mmio)
1808                 return;
1809
1810         radeon_do_cp_idle(dev_priv);
1811
1812         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1813                      dev_priv->surfaces[surf_index].flags);
1814         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1815                      dev_priv->surfaces[surf_index].lower);
1816         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1817                      dev_priv->surfaces[surf_index].upper);
1818 }
1819
1820 /* Allocates a virtual surface
1821  * doesn't always allocate a real surface, will stretch an existing
1822  * surface when possible.
1823  *
1824  * Note that refcount can be at most 2, since during a free refcount=3
1825  * might mean we have to allocate a new surface which might not always
1826  * be available.
1827  * For example : we allocate three contigous surfaces ABC. If B is
1828  * freed, we suddenly need two surfaces to store A and C, which might
1829  * not always be available.
1830  */
1831 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1832                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1833 {
1834         struct radeon_virt_surface *s;
1835         int i;
1836         int virt_surface_index;
1837         uint32_t new_upper, new_lower;
1838
1839         new_lower = new->address;
1840         new_upper = new_lower + new->size - 1;
1841
1842         /* sanity check */
1843         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1844             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1845              RADEON_SURF_ADDRESS_FIXED_MASK)
1846             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1847                 return -1;
1848
1849         /* make sure there is no overlap with existing surfaces */
1850         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1851                 if ((dev_priv->surfaces[i].refcount != 0) &&
1852                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1853                       (new_lower < dev_priv->surfaces[i].upper)) ||
1854                      ((new_lower < dev_priv->surfaces[i].lower) &&
1855                       (new_upper > dev_priv->surfaces[i].lower)))) {
1856                         return -1;
1857                 }
1858         }
1859
1860         /* find a virtual surface */
1861         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1862                 if (dev_priv->virt_surfaces[i].filp == 0)
1863                         break;
1864         if (i == 2 * RADEON_MAX_SURFACES) {
1865                 return -1;
1866         }
1867         virt_surface_index = i;
1868
1869         /* try to reuse an existing surface */
1870         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1871                 /* extend before */
1872                 if ((dev_priv->surfaces[i].refcount == 1) &&
1873                     (new->flags == dev_priv->surfaces[i].flags) &&
1874                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1875                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1876                         s->surface_index = i;
1877                         s->lower = new_lower;
1878                         s->upper = new_upper;
1879                         s->flags = new->flags;
1880                         s->filp = filp;
1881                         dev_priv->surfaces[i].refcount++;
1882                         dev_priv->surfaces[i].lower = s->lower;
1883                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1884                         return virt_surface_index;
1885                 }
1886
1887                 /* extend after */
1888                 if ((dev_priv->surfaces[i].refcount == 1) &&
1889                     (new->flags == dev_priv->surfaces[i].flags) &&
1890                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1891                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1892                         s->surface_index = i;
1893                         s->lower = new_lower;
1894                         s->upper = new_upper;
1895                         s->flags = new->flags;
1896                         s->filp = filp;
1897                         dev_priv->surfaces[i].refcount++;
1898                         dev_priv->surfaces[i].upper = s->upper;
1899                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1900                         return virt_surface_index;
1901                 }
1902         }
1903
1904         /* okay, we need a new one */
1905         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1906                 if (dev_priv->surfaces[i].refcount == 0) {
1907                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1908                         s->surface_index = i;
1909                         s->lower = new_lower;
1910                         s->upper = new_upper;
1911                         s->flags = new->flags;
1912                         s->filp = filp;
1913                         dev_priv->surfaces[i].refcount = 1;
1914                         dev_priv->surfaces[i].lower = s->lower;
1915                         dev_priv->surfaces[i].upper = s->upper;
1916                         dev_priv->surfaces[i].flags = s->flags;
1917                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1918                         return virt_surface_index;
1919                 }
1920         }
1921
1922         /* we didn't find anything */
1923         return -1;
1924 }
1925
1926 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1927                         int lower)
1928 {
1929         struct radeon_virt_surface *s;
1930         int i;
1931         /* find the virtual surface */
1932         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1933                 s = &(dev_priv->virt_surfaces[i]);
1934                 if (s->filp) {
1935                         if ((lower == s->lower) && (filp == s->filp)) {
1936                                 if (dev_priv->surfaces[s->surface_index].
1937                                     lower == s->lower)
1938                                         dev_priv->surfaces[s->surface_index].
1939                                             lower = s->upper;
1940
1941                                 if (dev_priv->surfaces[s->surface_index].
1942                                     upper == s->upper)
1943                                         dev_priv->surfaces[s->surface_index].
1944                                             upper = s->lower;
1945
1946                                 dev_priv->surfaces[s->surface_index].refcount--;
1947                                 if (dev_priv->surfaces[s->surface_index].
1948                                     refcount == 0)
1949                                         dev_priv->surfaces[s->surface_index].
1950                                             flags = 0;
1951                                 s->filp = NULL;
1952                                 radeon_apply_surface_regs(s->surface_index,
1953                                                           dev_priv);
1954                                 return 0;
1955                         }
1956                 }
1957         }
1958         return 1;
1959 }
1960
1961 static void radeon_surfaces_release(DRMFILE filp,
1962                                     drm_radeon_private_t * dev_priv)
1963 {
1964         int i;
1965         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1966                 if (dev_priv->virt_surfaces[i].filp == filp)
1967                         free_surface(filp, dev_priv,
1968                                      dev_priv->virt_surfaces[i].lower);
1969         }
1970 }
1971
1972 /* ================================================================
1973  * IOCTL functions
1974  */
1975 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1976 {
1977         DRM_DEVICE;
1978         drm_radeon_private_t *dev_priv = dev->dev_private;
1979         drm_radeon_surface_alloc_t alloc;
1980
1981         DRM_COPY_FROM_USER_IOCTL(alloc,
1982                                  (drm_radeon_surface_alloc_t __user *) data,
1983                                  sizeof(alloc));
1984
1985         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1986                 return DRM_ERR(EINVAL);
1987         else
1988                 return 0;
1989 }
1990
1991 static int radeon_surface_free(DRM_IOCTL_ARGS)
1992 {
1993         DRM_DEVICE;
1994         drm_radeon_private_t *dev_priv = dev->dev_private;
1995         drm_radeon_surface_free_t memfree;
1996
1997         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1998                                  sizeof(memfree));
1999
2000         if (free_surface(filp, dev_priv, memfree.address))
2001                 return DRM_ERR(EINVAL);
2002         else
2003                 return 0;
2004 }
2005
2006 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2007 {
2008         DRM_DEVICE;
2009         drm_radeon_private_t *dev_priv = dev->dev_private;
2010         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2011         drm_radeon_clear_t clear;
2012         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2013         DRM_DEBUG("\n");
2014
2015         LOCK_TEST_WITH_RETURN(dev, filp);
2016
2017         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2018                                  sizeof(clear));
2019
2020         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2021
2022         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2023                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2024
2025         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2026                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2027                 return DRM_ERR(EFAULT);
2028
2029         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2030
2031         COMMIT_RING();
2032         return 0;
2033 }
2034
2035 /* Not sure why this isn't set all the time:
2036  */
2037 static int radeon_do_init_pageflip(drm_device_t * dev)
2038 {
2039         drm_radeon_private_t *dev_priv = dev->dev_private;
2040         RING_LOCALS;
2041
2042         DRM_DEBUG("\n");
2043
2044         BEGIN_RING(6);
2045         RADEON_WAIT_UNTIL_3D_IDLE();
2046         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2047         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2048                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2049         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2050         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2051                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2052         ADVANCE_RING();
2053
2054         dev_priv->page_flipping = 1;
2055         dev_priv->current_page = 0;
2056         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2057
2058         return 0;
2059 }
2060
2061 /* Called whenever a client dies, from drm_release.
2062  * NOTE:  Lock isn't necessarily held when this is called!
2063  */
2064 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2065 {
2066         drm_radeon_private_t *dev_priv = dev->dev_private;
2067         DRM_DEBUG("\n");
2068
2069         if (dev_priv->current_page != 0)
2070                 radeon_cp_dispatch_flip(dev);
2071
2072         dev_priv->page_flipping = 0;
2073         return 0;
2074 }
2075
2076 /* Swapping and flipping are different operations, need different ioctls.
2077  * They can & should be intermixed to support multiple 3d windows.
2078  */
2079 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2080 {
2081         DRM_DEVICE;
2082         drm_radeon_private_t *dev_priv = dev->dev_private;
2083         DRM_DEBUG("\n");
2084
2085         LOCK_TEST_WITH_RETURN(dev, filp);
2086
2087         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2088
2089         if (!dev_priv->page_flipping)
2090                 radeon_do_init_pageflip(dev);
2091
2092         radeon_cp_dispatch_flip(dev);
2093
2094         COMMIT_RING();
2095         return 0;
2096 }
2097
2098 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2099 {
2100         DRM_DEVICE;
2101         drm_radeon_private_t *dev_priv = dev->dev_private;
2102         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2103         DRM_DEBUG("\n");
2104
2105         LOCK_TEST_WITH_RETURN(dev, filp);
2106
2107         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2108
2109         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2110                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2111
2112         radeon_cp_dispatch_swap(dev);
2113         dev_priv->sarea_priv->ctx_owner = 0;
2114
2115         COMMIT_RING();
2116         return 0;
2117 }
2118
2119 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2120 {
2121         DRM_DEVICE;
2122         drm_radeon_private_t *dev_priv = dev->dev_private;
2123         drm_file_t *filp_priv;
2124         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2125         drm_device_dma_t *dma = dev->dma;
2126         drm_buf_t *buf;
2127         drm_radeon_vertex_t vertex;
2128         drm_radeon_tcl_prim_t prim;
2129
2130         LOCK_TEST_WITH_RETURN(dev, filp);
2131
2132         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2133
2134         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2135                                  sizeof(vertex));
2136
2137         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2138                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2139
2140         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2141                 DRM_ERROR("buffer index %d (of %d max)\n",
2142                           vertex.idx, dma->buf_count - 1);
2143                 return DRM_ERR(EINVAL);
2144         }
2145         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2146                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2147                 return DRM_ERR(EINVAL);
2148         }
2149
2150         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2151         VB_AGE_TEST_WITH_RETURN(dev_priv);
2152
2153         buf = dma->buflist[vertex.idx];
2154
2155         if (buf->filp != filp) {
2156                 DRM_ERROR("process %d using buffer owned by %p\n",
2157                           DRM_CURRENTPID, buf->filp);
2158                 return DRM_ERR(EINVAL);
2159         }
2160         if (buf->pending) {
2161                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2162                 return DRM_ERR(EINVAL);
2163         }
2164
2165         /* Build up a prim_t record:
2166          */
2167         if (vertex.count) {
2168                 buf->used = vertex.count;       /* not used? */
2169
2170                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2171                         if (radeon_emit_state(dev_priv, filp_priv,
2172                                               &sarea_priv->context_state,
2173                                               sarea_priv->tex_state,
2174                                               sarea_priv->dirty)) {
2175                                 DRM_ERROR("radeon_emit_state failed\n");
2176                                 return DRM_ERR(EINVAL);
2177                         }
2178
2179                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2180                                                RADEON_UPLOAD_TEX1IMAGES |
2181                                                RADEON_UPLOAD_TEX2IMAGES |
2182                                                RADEON_REQUIRE_QUIESCENCE);
2183                 }
2184
2185                 prim.start = 0;
2186                 prim.finish = vertex.count;     /* unused */
2187                 prim.prim = vertex.prim;
2188                 prim.numverts = vertex.count;
2189                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2190
2191                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2192         }
2193
2194         if (vertex.discard) {
2195                 radeon_cp_discard_buffer(dev, buf);
2196         }
2197
2198         COMMIT_RING();
2199         return 0;
2200 }
2201
2202 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2203 {
2204         DRM_DEVICE;
2205         drm_radeon_private_t *dev_priv = dev->dev_private;
2206         drm_file_t *filp_priv;
2207         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2208         drm_device_dma_t *dma = dev->dma;
2209         drm_buf_t *buf;
2210         drm_radeon_indices_t elts;
2211         drm_radeon_tcl_prim_t prim;
2212         int count;
2213
2214         LOCK_TEST_WITH_RETURN(dev, filp);
2215
2216         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2217
2218         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2219                                  sizeof(elts));
2220
2221         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2222                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2223
2224         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2225                 DRM_ERROR("buffer index %d (of %d max)\n",
2226                           elts.idx, dma->buf_count - 1);
2227                 return DRM_ERR(EINVAL);
2228         }
2229         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2230                 DRM_ERROR("buffer prim %d\n", elts.prim);
2231                 return DRM_ERR(EINVAL);
2232         }
2233
2234         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2235         VB_AGE_TEST_WITH_RETURN(dev_priv);
2236
2237         buf = dma->buflist[elts.idx];
2238
2239         if (buf->filp != filp) {
2240                 DRM_ERROR("process %d using buffer owned by %p\n",
2241                           DRM_CURRENTPID, buf->filp);
2242                 return DRM_ERR(EINVAL);
2243         }
2244         if (buf->pending) {
2245                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2246                 return DRM_ERR(EINVAL);
2247         }
2248
2249         count = (elts.end - elts.start) / sizeof(u16);
2250         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2251
2252         if (elts.start & 0x7) {
2253                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2254                 return DRM_ERR(EINVAL);
2255         }
2256         if (elts.start < buf->used) {
2257                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2258                 return DRM_ERR(EINVAL);
2259         }
2260
2261         buf->used = elts.end;
2262
2263         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2264                 if (radeon_emit_state(dev_priv, filp_priv,
2265                                       &sarea_priv->context_state,
2266                                       sarea_priv->tex_state,
2267                                       sarea_priv->dirty)) {
2268                         DRM_ERROR("radeon_emit_state failed\n");
2269                         return DRM_ERR(EINVAL);
2270                 }
2271
2272                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2273                                        RADEON_UPLOAD_TEX1IMAGES |
2274                                        RADEON_UPLOAD_TEX2IMAGES |
2275                                        RADEON_REQUIRE_QUIESCENCE);
2276         }
2277
2278         /* Build up a prim_t record:
2279          */
2280         prim.start = elts.start;
2281         prim.finish = elts.end;
2282         prim.prim = elts.prim;
2283         prim.offset = 0;        /* offset from start of dma buffers */
2284         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2285         prim.vc_format = dev_priv->sarea_priv->vc_format;
2286
2287         radeon_cp_dispatch_indices(dev, buf, &prim);
2288         if (elts.discard) {
2289                 radeon_cp_discard_buffer(dev, buf);
2290         }
2291
2292         COMMIT_RING();
2293         return 0;
2294 }
2295
2296 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2297 {
2298         DRM_DEVICE;
2299         drm_radeon_private_t *dev_priv = dev->dev_private;
2300         drm_radeon_texture_t tex;
2301         drm_radeon_tex_image_t image;
2302         int ret;
2303
2304         LOCK_TEST_WITH_RETURN(dev, filp);
2305
2306         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2307                                  sizeof(tex));
2308
2309         if (tex.image == NULL) {
2310                 DRM_ERROR("null texture image!\n");
2311                 return DRM_ERR(EINVAL);
2312         }
2313
2314         if (DRM_COPY_FROM_USER(&image,
2315                                (drm_radeon_tex_image_t __user *) tex.image,
2316                                sizeof(image)))
2317                 return DRM_ERR(EFAULT);
2318
2319         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2320         VB_AGE_TEST_WITH_RETURN(dev_priv);
2321
2322         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2323
2324         COMMIT_RING();
2325         return ret;
2326 }
2327
2328 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2329 {
2330         DRM_DEVICE;
2331         drm_radeon_private_t *dev_priv = dev->dev_private;
2332         drm_radeon_stipple_t stipple;
2333         u32 mask[32];
2334
2335         LOCK_TEST_WITH_RETURN(dev, filp);
2336
2337         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2338                                  sizeof(stipple));
2339
2340         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2341                 return DRM_ERR(EFAULT);
2342
2343         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2344
2345         radeon_cp_dispatch_stipple(dev, mask);
2346
2347         COMMIT_RING();
2348         return 0;
2349 }
2350
2351 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2352 {
2353         DRM_DEVICE;
2354         drm_radeon_private_t *dev_priv = dev->dev_private;
2355         drm_device_dma_t *dma = dev->dma;
2356         drm_buf_t *buf;
2357         drm_radeon_indirect_t indirect;
2358         RING_LOCALS;
2359
2360         LOCK_TEST_WITH_RETURN(dev, filp);
2361
2362         DRM_COPY_FROM_USER_IOCTL(indirect,
2363                                  (drm_radeon_indirect_t __user *) data,
2364                                  sizeof(indirect));
2365
2366         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2367                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2368
2369         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2370                 DRM_ERROR("buffer index %d (of %d max)\n",
2371                           indirect.idx, dma->buf_count - 1);
2372                 return DRM_ERR(EINVAL);
2373         }
2374
2375         buf = dma->buflist[indirect.idx];
2376
2377         if (buf->filp != filp) {
2378                 DRM_ERROR("process %d using buffer owned by %p\n",
2379                           DRM_CURRENTPID, buf->filp);
2380                 return DRM_ERR(EINVAL);
2381         }
2382         if (buf->pending) {
2383                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2384                 return DRM_ERR(EINVAL);
2385         }
2386
2387         if (indirect.start < buf->used) {
2388                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2389                           indirect.start, buf->used);
2390                 return DRM_ERR(EINVAL);
2391         }
2392
2393         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2394         VB_AGE_TEST_WITH_RETURN(dev_priv);
2395
2396         buf->used = indirect.end;
2397
2398         /* Wait for the 3D stream to idle before the indirect buffer
2399          * containing 2D acceleration commands is processed.
2400          */
2401         BEGIN_RING(2);
2402
2403         RADEON_WAIT_UNTIL_3D_IDLE();
2404
2405         ADVANCE_RING();
2406
2407         /* Dispatch the indirect buffer full of commands from the
2408          * X server.  This is insecure and is thus only available to
2409          * privileged clients.
2410          */
2411         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2412         if (indirect.discard) {
2413                 radeon_cp_discard_buffer(dev, buf);
2414         }
2415
2416         COMMIT_RING();
2417         return 0;
2418 }
2419
2420 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2421 {
2422         DRM_DEVICE;
2423         drm_radeon_private_t *dev_priv = dev->dev_private;
2424         drm_file_t *filp_priv;
2425         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2426         drm_device_dma_t *dma = dev->dma;
2427         drm_buf_t *buf;
2428         drm_radeon_vertex2_t vertex;
2429         int i;
2430         unsigned char laststate;
2431
2432         LOCK_TEST_WITH_RETURN(dev, filp);
2433
2434         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2435
2436         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2437                                  sizeof(vertex));
2438
2439         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2440                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2441
2442         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2443                 DRM_ERROR("buffer index %d (of %d max)\n",
2444                           vertex.idx, dma->buf_count - 1);
2445                 return DRM_ERR(EINVAL);
2446         }
2447
2448         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2449         VB_AGE_TEST_WITH_RETURN(dev_priv);
2450
2451         buf = dma->buflist[vertex.idx];
2452
2453         if (buf->filp != filp) {
2454                 DRM_ERROR("process %d using buffer owned by %p\n",
2455                           DRM_CURRENTPID, buf->filp);
2456                 return DRM_ERR(EINVAL);
2457         }
2458
2459         if (buf->pending) {
2460                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2461                 return DRM_ERR(EINVAL);
2462         }
2463
2464         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2465                 return DRM_ERR(EINVAL);
2466
2467         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2468                 drm_radeon_prim_t prim;
2469                 drm_radeon_tcl_prim_t tclprim;
2470
2471                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2472                         return DRM_ERR(EFAULT);
2473
2474                 if (prim.stateidx != laststate) {
2475                         drm_radeon_state_t state;
2476
2477                         if (DRM_COPY_FROM_USER(&state,
2478                                                &vertex.state[prim.stateidx],
2479                                                sizeof(state)))
2480                                 return DRM_ERR(EFAULT);
2481
2482                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2483                                 DRM_ERROR("radeon_emit_state2 failed\n");
2484                                 return DRM_ERR(EINVAL);
2485                         }
2486
2487                         laststate = prim.stateidx;
2488                 }
2489
2490                 tclprim.start = prim.start;
2491                 tclprim.finish = prim.finish;
2492                 tclprim.prim = prim.prim;
2493                 tclprim.vc_format = prim.vc_format;
2494
2495                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2496                         tclprim.offset = prim.numverts * 64;
2497                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2498
2499                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2500                 } else {
2501                         tclprim.numverts = prim.numverts;
2502                         tclprim.offset = 0;     /* not used */
2503
2504                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2505                 }
2506
2507                 if (sarea_priv->nbox == 1)
2508                         sarea_priv->nbox = 0;
2509         }
2510
2511         if (vertex.discard) {
2512                 radeon_cp_discard_buffer(dev, buf);
2513         }
2514
2515         COMMIT_RING();
2516         return 0;
2517 }
2518
2519 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2520                                drm_file_t * filp_priv,
2521                                drm_radeon_cmd_header_t header,
2522                                drm_radeon_kcmd_buffer_t *cmdbuf)
2523 {
2524         int id = (int)header.packet.packet_id;
2525         int sz, reg;
2526         int *data = (int *)cmdbuf->buf;
2527         RING_LOCALS;
2528
2529         if (id >= RADEON_MAX_STATE_PACKETS)
2530                 return DRM_ERR(EINVAL);
2531
2532         sz = packet[id].len;
2533         reg = packet[id].start;
2534
2535         if (sz * sizeof(int) > cmdbuf->bufsz) {
2536                 DRM_ERROR("Packet size provided larger than data provided\n");
2537                 return DRM_ERR(EINVAL);
2538         }
2539
2540         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2541                 DRM_ERROR("Packet verification failed\n");
2542                 return DRM_ERR(EINVAL);
2543         }
2544
2545         BEGIN_RING(sz + 1);
2546         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2547         OUT_RING_TABLE(data, sz);
2548         ADVANCE_RING();
2549
2550         cmdbuf->buf += sz * sizeof(int);
2551         cmdbuf->bufsz -= sz * sizeof(int);
2552         return 0;
2553 }
2554
2555 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2556                                           drm_radeon_cmd_header_t header,
2557                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2558 {
2559         int sz = header.scalars.count;
2560         int start = header.scalars.offset;
2561         int stride = header.scalars.stride;
2562         RING_LOCALS;
2563
2564         BEGIN_RING(3 + sz);
2565         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2566         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2567         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2568         OUT_RING_TABLE(cmdbuf->buf, sz);
2569         ADVANCE_RING();
2570         cmdbuf->buf += sz * sizeof(int);
2571         cmdbuf->bufsz -= sz * sizeof(int);
2572         return 0;
2573 }
2574
2575 /* God this is ugly
2576  */
2577 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2578                                            drm_radeon_cmd_header_t header,
2579                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2580 {
2581         int sz = header.scalars.count;
2582         int start = ((unsigned int)header.scalars.offset) + 0x100;
2583         int stride = header.scalars.stride;
2584         RING_LOCALS;
2585
2586         BEGIN_RING(3 + sz);
2587         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2588         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2589         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2590         OUT_RING_TABLE(cmdbuf->buf, sz);
2591         ADVANCE_RING();
2592         cmdbuf->buf += sz * sizeof(int);
2593         cmdbuf->bufsz -= sz * sizeof(int);
2594         return 0;
2595 }
2596
2597 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2598                                           drm_radeon_cmd_header_t header,
2599                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2600 {
2601         int sz = header.vectors.count;
2602         int start = header.vectors.offset;
2603         int stride = header.vectors.stride;
2604         RING_LOCALS;
2605
2606         BEGIN_RING(5 + sz);
2607         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2608         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2609         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2610         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2611         OUT_RING_TABLE(cmdbuf->buf, sz);
2612         ADVANCE_RING();
2613
2614         cmdbuf->buf += sz * sizeof(int);
2615         cmdbuf->bufsz -= sz * sizeof(int);
2616         return 0;
2617 }
2618
2619 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2620                                           drm_radeon_cmd_header_t header,
2621                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2622 {
2623         int sz = header.veclinear.count * 4;
2624         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2625         RING_LOCALS;
2626
2627         if (!sz)
2628                 return 0;
2629         if (sz * 4 > cmdbuf->bufsz)
2630                 return DRM_ERR(EINVAL);
2631
2632         BEGIN_RING(5 + sz);
2633         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2634         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2635         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2636         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2637         OUT_RING_TABLE(cmdbuf->buf, sz);
2638         ADVANCE_RING();
2639
2640         cmdbuf->buf += sz * sizeof(int);
2641         cmdbuf->bufsz -= sz * sizeof(int);
2642         return 0;
2643 }
2644
2645 static int radeon_emit_packet3(drm_device_t * dev,
2646                                drm_file_t * filp_priv,
2647                                drm_radeon_kcmd_buffer_t *cmdbuf)
2648 {
2649         drm_radeon_private_t *dev_priv = dev->dev_private;
2650         unsigned int cmdsz;
2651         int ret;
2652         RING_LOCALS;
2653
2654         DRM_DEBUG("\n");
2655
2656         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2657                                                   cmdbuf, &cmdsz))) {
2658                 DRM_ERROR("Packet verification failed\n");
2659                 return ret;
2660         }
2661
2662         BEGIN_RING(cmdsz);
2663         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2664         ADVANCE_RING();
2665
2666         cmdbuf->buf += cmdsz * 4;
2667         cmdbuf->bufsz -= cmdsz * 4;
2668         return 0;
2669 }
2670
2671 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2672                                         drm_file_t *filp_priv,
2673                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2674                                         int orig_nbox)
2675 {
2676         drm_radeon_private_t *dev_priv = dev->dev_private;
2677         drm_clip_rect_t box;
2678         unsigned int cmdsz;
2679         int ret;
2680         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2681         int i = 0;
2682         RING_LOCALS;
2683
2684         DRM_DEBUG("\n");
2685
2686         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2687                                                   cmdbuf, &cmdsz))) {
2688                 DRM_ERROR("Packet verification failed\n");
2689                 return ret;
2690         }
2691
2692         if (!orig_nbox)
2693                 goto out;
2694
2695         do {
2696                 if (i < cmdbuf->nbox) {
2697                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2698                                 return DRM_ERR(EFAULT);
2699                         /* FIXME The second and subsequent times round
2700                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2701                          * calling emit_clip_rect(). This fixes a
2702                          * lockup on fast machines when sending
2703                          * several cliprects with a cmdbuf, as when
2704                          * waving a 2D window over a 3D
2705                          * window. Something in the commands from user
2706                          * space seems to hang the card when they're
2707                          * sent several times in a row. That would be
2708                          * the correct place to fix it but this works
2709                          * around it until I can figure that out - Tim
2710                          * Smith */
2711                         if (i) {
2712                                 BEGIN_RING(2);
2713                                 RADEON_WAIT_UNTIL_3D_IDLE();
2714                                 ADVANCE_RING();
2715                         }
2716                         radeon_emit_clip_rect(dev_priv, &box);
2717                 }
2718
2719                 BEGIN_RING(cmdsz);
2720                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2721                 ADVANCE_RING();
2722
2723         } while (++i < cmdbuf->nbox);
2724         if (cmdbuf->nbox == 1)
2725                 cmdbuf->nbox = 0;
2726
2727       out:
2728         cmdbuf->buf += cmdsz * 4;
2729         cmdbuf->bufsz -= cmdsz * 4;
2730         return 0;
2731 }
2732
2733 static int radeon_emit_wait(drm_device_t * dev, int flags)
2734 {
2735         drm_radeon_private_t *dev_priv = dev->dev_private;
2736         RING_LOCALS;
2737
2738         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2739         switch (flags) {
2740         case RADEON_WAIT_2D:
2741                 BEGIN_RING(2);
2742                 RADEON_WAIT_UNTIL_2D_IDLE();
2743                 ADVANCE_RING();
2744                 break;
2745         case RADEON_WAIT_3D:
2746                 BEGIN_RING(2);
2747                 RADEON_WAIT_UNTIL_3D_IDLE();
2748                 ADVANCE_RING();
2749                 break;
2750         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2751                 BEGIN_RING(2);
2752                 RADEON_WAIT_UNTIL_IDLE();
2753                 ADVANCE_RING();
2754                 break;
2755         default:
2756                 return DRM_ERR(EINVAL);
2757         }
2758
2759         return 0;
2760 }
2761
2762 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2763 {
2764         DRM_DEVICE;
2765         drm_radeon_private_t *dev_priv = dev->dev_private;
2766         drm_file_t *filp_priv;
2767         drm_device_dma_t *dma = dev->dma;
2768         drm_buf_t *buf = NULL;
2769         int idx;
2770         drm_radeon_kcmd_buffer_t cmdbuf;
2771         drm_radeon_cmd_header_t header;
2772         int orig_nbox, orig_bufsz;
2773         char *kbuf = NULL;
2774
2775         LOCK_TEST_WITH_RETURN(dev, filp);
2776
2777         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2778
2779         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2780                                  (drm_radeon_cmd_buffer_t __user *) data,
2781                                  sizeof(cmdbuf));
2782
2783         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2784         VB_AGE_TEST_WITH_RETURN(dev_priv);
2785
2786         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2787                 return DRM_ERR(EINVAL);
2788         }
2789
2790         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2791          * races between checking values and using those values in other code,
2792          * and simply to avoid a lot of function calls to copy in data.
2793          */
2794         orig_bufsz = cmdbuf.bufsz;
2795         if (orig_bufsz != 0) {
2796                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2797                 if (kbuf == NULL)
2798                         return DRM_ERR(ENOMEM);
2799                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2800                                        cmdbuf.bufsz)) {
2801                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2802                         return DRM_ERR(EFAULT);
2803                 }
2804                 cmdbuf.buf = kbuf;
2805         }
2806
2807         orig_nbox = cmdbuf.nbox;
2808
2809         if (dev_priv->microcode_version == UCODE_R300) {
2810                 int temp;
2811                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2812
2813                 if (orig_bufsz != 0)
2814                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2815
2816                 return temp;
2817         }
2818
2819         /* microcode_version != r300 */
2820         while (cmdbuf.bufsz >= sizeof(header)) {
2821
2822                 header.i = *(int *)cmdbuf.buf;
2823                 cmdbuf.buf += sizeof(header);
2824                 cmdbuf.bufsz -= sizeof(header);
2825
2826                 switch (header.header.cmd_type) {
2827                 case RADEON_CMD_PACKET:
2828                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2829                         if (radeon_emit_packets
2830                             (dev_priv, filp_priv, header, &cmdbuf)) {
2831                                 DRM_ERROR("radeon_emit_packets failed\n");
2832                                 goto err;
2833                         }
2834                         break;
2835
2836                 case RADEON_CMD_SCALARS:
2837                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2838                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2839                                 DRM_ERROR("radeon_emit_scalars failed\n");
2840                                 goto err;
2841                         }
2842                         break;
2843
2844                 case RADEON_CMD_VECTORS:
2845                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2846                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2847                                 DRM_ERROR("radeon_emit_vectors failed\n");
2848                                 goto err;
2849                         }
2850                         break;
2851
2852                 case RADEON_CMD_DMA_DISCARD:
2853                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2854                         idx = header.dma.buf_idx;
2855                         if (idx < 0 || idx >= dma->buf_count) {
2856                                 DRM_ERROR("buffer index %d (of %d max)\n",
2857                                           idx, dma->buf_count - 1);
2858                                 goto err;
2859                         }
2860
2861                         buf = dma->buflist[idx];
2862                         if (buf->filp != filp || buf->pending) {
2863                                 DRM_ERROR("bad buffer %p %p %d\n",
2864                                           buf->filp, filp, buf->pending);
2865                                 goto err;
2866                         }
2867
2868                         radeon_cp_discard_buffer(dev, buf);
2869                         break;
2870
2871                 case RADEON_CMD_PACKET3:
2872                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2873                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2874                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2875                                 goto err;
2876                         }
2877                         break;
2878
2879                 case RADEON_CMD_PACKET3_CLIP:
2880                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2881                         if (radeon_emit_packet3_cliprect
2882                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2883                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2884                                 goto err;
2885                         }
2886                         break;
2887
2888                 case RADEON_CMD_SCALARS2:
2889                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2890                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2891                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2892                                 goto err;
2893                         }
2894                         break;
2895
2896                 case RADEON_CMD_WAIT:
2897                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2898                         if (radeon_emit_wait(dev, header.wait.flags)) {
2899                                 DRM_ERROR("radeon_emit_wait failed\n");
2900                                 goto err;
2901                         }
2902                         break;
2903                 case RADEON_CMD_VECLINEAR:
2904                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2905                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2906                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2907                                 goto err;
2908                         }
2909                         break;
2910
2911                 default:
2912                         DRM_ERROR("bad cmd_type %d at %p\n",
2913                                   header.header.cmd_type,
2914                                   cmdbuf.buf - sizeof(header));
2915                         goto err;
2916                 }
2917         }
2918
2919         if (orig_bufsz != 0)
2920                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2921
2922         DRM_DEBUG("DONE\n");
2923         COMMIT_RING();
2924         return 0;
2925
2926       err:
2927         if (orig_bufsz != 0)
2928                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2929         return DRM_ERR(EINVAL);
2930 }
2931
2932 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2933 {
2934         DRM_DEVICE;
2935         drm_radeon_private_t *dev_priv = dev->dev_private;
2936         drm_radeon_getparam_t param;
2937         int value;
2938
2939         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2940                                  sizeof(param));
2941
2942         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2943
2944         switch (param.param) {
2945         case RADEON_PARAM_GART_BUFFER_OFFSET:
2946                 value = dev_priv->gart_buffers_offset;
2947                 break;
2948         case RADEON_PARAM_LAST_FRAME:
2949                 dev_priv->stats.last_frame_reads++;
2950                 value = GET_SCRATCH(0);
2951                 break;
2952         case RADEON_PARAM_LAST_DISPATCH:
2953                 value = GET_SCRATCH(1);
2954                 break;
2955         case RADEON_PARAM_LAST_CLEAR:
2956                 dev_priv->stats.last_clear_reads++;
2957                 value = GET_SCRATCH(2);
2958                 break;
2959         case RADEON_PARAM_IRQ_NR:
2960                 value = dev->irq;
2961                 break;
2962         case RADEON_PARAM_GART_BASE:
2963                 value = dev_priv->gart_vm_start;
2964                 break;
2965         case RADEON_PARAM_REGISTER_HANDLE:
2966                 value = dev_priv->mmio->offset;
2967                 break;
2968         case RADEON_PARAM_STATUS_HANDLE:
2969                 value = dev_priv->ring_rptr_offset;
2970                 break;
2971 #if BITS_PER_LONG == 32
2972                 /*
2973                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2974                  * pointer which can't fit into an int-sized variable.  According to
2975                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2976                  * not supporting it shouldn't be a problem.  If the same functionality
2977                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2978                  * so backwards-compatibility for the embedded platforms can be
2979                  * maintained.  --davidm 4-Feb-2004.
2980                  */
2981         case RADEON_PARAM_SAREA_HANDLE:
2982                 /* The lock is the first dword in the sarea. */
2983                 value = (long)dev->lock.hw_lock;
2984                 break;
2985 #endif
2986         case RADEON_PARAM_GART_TEX_HANDLE:
2987                 value = dev_priv->gart_textures_offset;
2988                 break;
2989         case RADEON_PARAM_SCRATCH_OFFSET:
2990                 if (!dev_priv->writeback_works)
2991                         return DRM_ERR(EINVAL);
2992                 value = RADEON_SCRATCH_REG_OFFSET;
2993                 break;
2994         case RADEON_PARAM_CARD_TYPE:
2995                 if (dev_priv->flags & CHIP_IS_PCIE)
2996                         value = RADEON_CARD_PCIE;
2997                 else if (dev_priv->flags & CHIP_IS_AGP)
2998                         value = RADEON_CARD_AGP;
2999                 else
3000                         value = RADEON_CARD_PCI;
3001                 break;
3002         default:
3003                 DRM_DEBUG("Invalid parameter %d\n", param.param);
3004                 return DRM_ERR(EINVAL);
3005         }
3006
3007         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3008                 DRM_ERROR("copy_to_user\n");
3009                 return DRM_ERR(EFAULT);
3010         }
3011
3012         return 0;
3013 }
3014
3015 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3016 {
3017         DRM_DEVICE;
3018         drm_radeon_private_t *dev_priv = dev->dev_private;
3019         drm_file_t *filp_priv;
3020         drm_radeon_setparam_t sp;
3021         struct drm_radeon_driver_file_fields *radeon_priv;
3022
3023         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3024
3025         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3026                                  sizeof(sp));
3027
3028         switch (sp.param) {
3029         case RADEON_SETPARAM_FB_LOCATION:
3030                 radeon_priv = filp_priv->driver_priv;
3031                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3032                 break;
3033         case RADEON_SETPARAM_SWITCH_TILING:
3034                 if (sp.value == 0) {
3035                         DRM_DEBUG("color tiling disabled\n");
3036                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3037                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3038                         dev_priv->sarea_priv->tiling_enabled = 0;
3039                 } else if (sp.value == 1) {
3040                         DRM_DEBUG("color tiling enabled\n");
3041                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3042                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3043                         dev_priv->sarea_priv->tiling_enabled = 1;
3044                 }
3045                 break;
3046         case RADEON_SETPARAM_PCIGART_LOCATION:
3047                 dev_priv->pcigart_offset = sp.value;
3048                 break;
3049         case RADEON_SETPARAM_NEW_MEMMAP:
3050                 dev_priv->new_memmap = sp.value;
3051                 break;
3052         default:
3053                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3054                 return DRM_ERR(EINVAL);
3055         }
3056
3057         return 0;
3058 }
3059
3060 /* When a client dies:
3061  *    - Check for and clean up flipped page state
3062  *    - Free any alloced GART memory.
3063  *    - Free any alloced radeon surfaces.
3064  *
3065  * DRM infrastructure takes care of reclaiming dma buffers.
3066  */
3067 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3068 {
3069         if (dev->dev_private) {
3070                 drm_radeon_private_t *dev_priv = dev->dev_private;
3071                 if (dev_priv->page_flipping) {
3072                         radeon_do_cleanup_pageflip(dev);
3073                 }
3074                 radeon_mem_release(filp, dev_priv->gart_heap);
3075                 radeon_mem_release(filp, dev_priv->fb_heap);
3076                 radeon_surfaces_release(filp, dev_priv);
3077         }
3078 }
3079
3080 void radeon_driver_lastclose(drm_device_t * dev)
3081 {
3082         radeon_do_release(dev);
3083 }
3084
3085 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3086 {
3087         drm_radeon_private_t *dev_priv = dev->dev_private;
3088         struct drm_radeon_driver_file_fields *radeon_priv;
3089
3090         DRM_DEBUG("\n");
3091         radeon_priv =
3092             (struct drm_radeon_driver_file_fields *)
3093             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3094
3095         if (!radeon_priv)
3096                 return -ENOMEM;
3097
3098         filp_priv->driver_priv = radeon_priv;
3099
3100         if (dev_priv)
3101                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3102         else
3103                 radeon_priv->radeon_fb_delta = 0;
3104         return 0;
3105 }
3106
3107 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3108 {
3109         struct drm_radeon_driver_file_fields *radeon_priv =
3110             filp_priv->driver_priv;
3111
3112         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3113 }
3114
3115 drm_ioctl_desc_t radeon_ioctls[] = {
3116         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3117         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3118         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3121         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3122         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3123         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3124         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3125         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3126         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3127         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3128         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3129         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3130         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3131         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3132         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3133         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3134         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3135         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3136         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3137         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3138         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3139         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3140         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3141         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3142         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3143 };
3144
3145 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);