1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 u32 fb_start = dev_priv->fb_location;
47 u32 fb_end = fb_start + dev_priv->fb_size - 1;
48 u32 gart_start = dev_priv->gart_vm_start;
49 u32 gart_end = gart_start + dev_priv->gart_size - 1;
50 struct drm_radeon_driver_file_fields *radeon_priv;
52 /* Hrm ... the story of the offset ... So this function converts
53 * the various ideas of what userland clients might have for an
54 * offset in the card address space into an offset into the card
55 * address space :) So with a sane client, it should just keep
56 * the value intact and just do some boundary checking. However,
57 * not all clients are sane. Some older clients pass us 0 based
58 * offsets relative to the start of the framebuffer and some may
59 * assume the AGP aperture it appended to the framebuffer, so we
60 * try to detect those cases and fix them up.
62 * Note: It might be a good idea here to make sure the offset lands
63 * in some "allowed" area to protect things like the PCIE GART...
66 /* First, the best case, the offset already lands in either the
67 * framebuffer or the GART mapped space
69 if ((off >= fb_start && off <= fb_end) ||
70 (off >= gart_start && off <= gart_end))
73 /* Ok, that didn't happen... now check if we have a zero based
74 * offset that fits in the framebuffer + gart space, apply the
75 * magic offset we get from SETPARAM or calculated from fb_location
77 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
78 radeon_priv = filp_priv->driver_priv;
79 off += radeon_priv->radeon_fb_delta;
82 /* Finally, assume we aimed at a GART offset if beyond the fb */
84 off = off - fb_end - 1 + gart_start;
86 /* Now recheck and fail if out of bounds */
87 if ((off >= fb_start && off <= fb_end) ||
88 (off >= gart_start && off <= gart_end)) {
89 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
93 return DRM_ERR(EINVAL);
96 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98 drm_file_t * filp_priv,
103 case RADEON_EMIT_PP_MISC:
104 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
105 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
106 DRM_ERROR("Invalid depth buffer offset\n");
107 return DRM_ERR(EINVAL);
111 case RADEON_EMIT_PP_CNTL:
112 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
113 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
114 DRM_ERROR("Invalid colour buffer offset\n");
115 return DRM_ERR(EINVAL);
119 case R200_EMIT_PP_TXOFFSET_0:
120 case R200_EMIT_PP_TXOFFSET_1:
121 case R200_EMIT_PP_TXOFFSET_2:
122 case R200_EMIT_PP_TXOFFSET_3:
123 case R200_EMIT_PP_TXOFFSET_4:
124 case R200_EMIT_PP_TXOFFSET_5:
125 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127 DRM_ERROR("Invalid R200 texture offset\n");
128 return DRM_ERR(EINVAL);
132 case RADEON_EMIT_PP_TXFILTER_0:
133 case RADEON_EMIT_PP_TXFILTER_1:
134 case RADEON_EMIT_PP_TXFILTER_2:
135 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
136 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
137 DRM_ERROR("Invalid R100 texture offset\n");
138 return DRM_ERR(EINVAL);
142 case R200_EMIT_PP_CUBIC_OFFSETS_0:
143 case R200_EMIT_PP_CUBIC_OFFSETS_1:
144 case R200_EMIT_PP_CUBIC_OFFSETS_2:
145 case R200_EMIT_PP_CUBIC_OFFSETS_3:
146 case R200_EMIT_PP_CUBIC_OFFSETS_4:
147 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149 for (i = 0; i < 5; i++) {
150 if (radeon_check_and_fixup_offset(dev_priv,
154 ("Invalid R200 cubic texture offset\n");
155 return DRM_ERR(EINVAL);
161 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165 for (i = 0; i < 5; i++) {
166 if (radeon_check_and_fixup_offset(dev_priv,
170 ("Invalid R100 cubic texture offset\n");
171 return DRM_ERR(EINVAL);
177 case R200_EMIT_VAP_CTL:{
180 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
185 case RADEON_EMIT_RB3D_COLORPITCH:
186 case RADEON_EMIT_RE_LINE_PATTERN:
187 case RADEON_EMIT_SE_LINE_WIDTH:
188 case RADEON_EMIT_PP_LUM_MATRIX:
189 case RADEON_EMIT_PP_ROT_MATRIX_0:
190 case RADEON_EMIT_RB3D_STENCILREFMASK:
191 case RADEON_EMIT_SE_VPORT_XSCALE:
192 case RADEON_EMIT_SE_CNTL:
193 case RADEON_EMIT_SE_CNTL_STATUS:
194 case RADEON_EMIT_RE_MISC:
195 case RADEON_EMIT_PP_BORDER_COLOR_0:
196 case RADEON_EMIT_PP_BORDER_COLOR_1:
197 case RADEON_EMIT_PP_BORDER_COLOR_2:
198 case RADEON_EMIT_SE_ZBIAS_FACTOR:
199 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
200 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
201 case R200_EMIT_PP_TXCBLEND_0:
202 case R200_EMIT_PP_TXCBLEND_1:
203 case R200_EMIT_PP_TXCBLEND_2:
204 case R200_EMIT_PP_TXCBLEND_3:
205 case R200_EMIT_PP_TXCBLEND_4:
206 case R200_EMIT_PP_TXCBLEND_5:
207 case R200_EMIT_PP_TXCBLEND_6:
208 case R200_EMIT_PP_TXCBLEND_7:
209 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
210 case R200_EMIT_TFACTOR_0:
211 case R200_EMIT_VTX_FMT_0:
212 case R200_EMIT_MATRIX_SELECT_0:
213 case R200_EMIT_TEX_PROC_CTL_2:
214 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
215 case R200_EMIT_PP_TXFILTER_0:
216 case R200_EMIT_PP_TXFILTER_1:
217 case R200_EMIT_PP_TXFILTER_2:
218 case R200_EMIT_PP_TXFILTER_3:
219 case R200_EMIT_PP_TXFILTER_4:
220 case R200_EMIT_PP_TXFILTER_5:
221 case R200_EMIT_VTE_CNTL:
222 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
223 case R200_EMIT_PP_TAM_DEBUG3:
224 case R200_EMIT_PP_CNTL_X:
225 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
226 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
227 case R200_EMIT_RE_SCISSOR_TL_0:
228 case R200_EMIT_RE_SCISSOR_TL_1:
229 case R200_EMIT_RE_SCISSOR_TL_2:
230 case R200_EMIT_SE_VAP_CNTL_STATUS:
231 case R200_EMIT_SE_VTX_STATE_CNTL:
232 case R200_EMIT_RE_POINTSIZE:
233 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
234 case R200_EMIT_PP_CUBIC_FACES_0:
235 case R200_EMIT_PP_CUBIC_FACES_1:
236 case R200_EMIT_PP_CUBIC_FACES_2:
237 case R200_EMIT_PP_CUBIC_FACES_3:
238 case R200_EMIT_PP_CUBIC_FACES_4:
239 case R200_EMIT_PP_CUBIC_FACES_5:
240 case RADEON_EMIT_PP_TEX_SIZE_0:
241 case RADEON_EMIT_PP_TEX_SIZE_1:
242 case RADEON_EMIT_PP_TEX_SIZE_2:
243 case R200_EMIT_RB3D_BLENDCOLOR:
244 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
245 case RADEON_EMIT_PP_CUBIC_FACES_0:
246 case RADEON_EMIT_PP_CUBIC_FACES_1:
247 case RADEON_EMIT_PP_CUBIC_FACES_2:
248 case R200_EMIT_PP_TRI_PERF_CNTL:
249 case R200_EMIT_PP_AFS_0:
250 case R200_EMIT_PP_AFS_1:
251 case R200_EMIT_ATF_TFACTOR:
252 case R200_EMIT_PP_TXCTLALL_0:
253 case R200_EMIT_PP_TXCTLALL_1:
254 case R200_EMIT_PP_TXCTLALL_2:
255 case R200_EMIT_PP_TXCTLALL_3:
256 case R200_EMIT_PP_TXCTLALL_4:
257 case R200_EMIT_PP_TXCTLALL_5:
258 case R200_EMIT_VAP_PVS_CNTL:
259 /* These packets don't contain memory offsets */
263 DRM_ERROR("Unknown state packet ID %d\n", id);
264 return DRM_ERR(EINVAL);
270 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
272 drm_file_t *filp_priv,
273 drm_radeon_kcmd_buffer_t *
277 u32 *cmd = (u32 *) cmdbuf->buf;
279 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
281 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
282 DRM_ERROR("Not a type 3 packet\n");
283 return DRM_ERR(EINVAL);
286 if (4 * *cmdsz > cmdbuf->bufsz) {
287 DRM_ERROR("Packet size larger than size of data provided\n");
288 return DRM_ERR(EINVAL);
291 /* Check client state and fix it up if necessary */
292 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
295 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
296 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
297 offset = cmd[2] << 10;
298 if (radeon_check_and_fixup_offset
299 (dev_priv, filp_priv, &offset)) {
300 DRM_ERROR("Invalid first packet offset\n");
301 return DRM_ERR(EINVAL);
303 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
306 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
307 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
308 offset = cmd[3] << 10;
309 if (radeon_check_and_fixup_offset
310 (dev_priv, filp_priv, &offset)) {
311 DRM_ERROR("Invalid second packet offset\n");
312 return DRM_ERR(EINVAL);
314 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
321 /* ================================================================
322 * CP hardware state programming functions
325 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
326 drm_clip_rect_t * box)
330 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
331 box->x1, box->y1, box->x2, box->y2);
334 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
335 OUT_RING((box->y1 << 16) | box->x1);
336 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
337 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
343 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
344 drm_file_t * filp_priv,
345 drm_radeon_context_regs_t * ctx,
346 drm_radeon_texture_regs_t * tex,
350 DRM_DEBUG("dirty=0x%08x\n", dirty);
352 if (dirty & RADEON_UPLOAD_CONTEXT) {
353 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354 &ctx->rb3d_depthoffset)) {
355 DRM_ERROR("Invalid depth buffer offset\n");
356 return DRM_ERR(EINVAL);
359 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
360 &ctx->rb3d_coloroffset)) {
361 DRM_ERROR("Invalid depth buffer offset\n");
362 return DRM_ERR(EINVAL);
366 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
367 OUT_RING(ctx->pp_misc);
368 OUT_RING(ctx->pp_fog_color);
369 OUT_RING(ctx->re_solid_color);
370 OUT_RING(ctx->rb3d_blendcntl);
371 OUT_RING(ctx->rb3d_depthoffset);
372 OUT_RING(ctx->rb3d_depthpitch);
373 OUT_RING(ctx->rb3d_zstencilcntl);
374 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
375 OUT_RING(ctx->pp_cntl);
376 OUT_RING(ctx->rb3d_cntl);
377 OUT_RING(ctx->rb3d_coloroffset);
378 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
379 OUT_RING(ctx->rb3d_colorpitch);
383 if (dirty & RADEON_UPLOAD_VERTFMT) {
385 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
386 OUT_RING(ctx->se_coord_fmt);
390 if (dirty & RADEON_UPLOAD_LINE) {
392 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
393 OUT_RING(ctx->re_line_pattern);
394 OUT_RING(ctx->re_line_state);
395 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
396 OUT_RING(ctx->se_line_width);
400 if (dirty & RADEON_UPLOAD_BUMPMAP) {
402 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
403 OUT_RING(ctx->pp_lum_matrix);
404 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
405 OUT_RING(ctx->pp_rot_matrix_0);
406 OUT_RING(ctx->pp_rot_matrix_1);
410 if (dirty & RADEON_UPLOAD_MASKS) {
412 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
413 OUT_RING(ctx->rb3d_stencilrefmask);
414 OUT_RING(ctx->rb3d_ropcntl);
415 OUT_RING(ctx->rb3d_planemask);
419 if (dirty & RADEON_UPLOAD_VIEWPORT) {
421 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
422 OUT_RING(ctx->se_vport_xscale);
423 OUT_RING(ctx->se_vport_xoffset);
424 OUT_RING(ctx->se_vport_yscale);
425 OUT_RING(ctx->se_vport_yoffset);
426 OUT_RING(ctx->se_vport_zscale);
427 OUT_RING(ctx->se_vport_zoffset);
431 if (dirty & RADEON_UPLOAD_SETUP) {
433 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
434 OUT_RING(ctx->se_cntl);
435 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
436 OUT_RING(ctx->se_cntl_status);
440 if (dirty & RADEON_UPLOAD_MISC) {
442 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
443 OUT_RING(ctx->re_misc);
447 if (dirty & RADEON_UPLOAD_TEX0) {
448 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
449 &tex[0].pp_txoffset)) {
450 DRM_ERROR("Invalid texture offset for unit 0\n");
451 return DRM_ERR(EINVAL);
455 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
456 OUT_RING(tex[0].pp_txfilter);
457 OUT_RING(tex[0].pp_txformat);
458 OUT_RING(tex[0].pp_txoffset);
459 OUT_RING(tex[0].pp_txcblend);
460 OUT_RING(tex[0].pp_txablend);
461 OUT_RING(tex[0].pp_tfactor);
462 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
463 OUT_RING(tex[0].pp_border_color);
467 if (dirty & RADEON_UPLOAD_TEX1) {
468 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469 &tex[1].pp_txoffset)) {
470 DRM_ERROR("Invalid texture offset for unit 1\n");
471 return DRM_ERR(EINVAL);
475 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
476 OUT_RING(tex[1].pp_txfilter);
477 OUT_RING(tex[1].pp_txformat);
478 OUT_RING(tex[1].pp_txoffset);
479 OUT_RING(tex[1].pp_txcblend);
480 OUT_RING(tex[1].pp_txablend);
481 OUT_RING(tex[1].pp_tfactor);
482 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
483 OUT_RING(tex[1].pp_border_color);
487 if (dirty & RADEON_UPLOAD_TEX2) {
488 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
489 &tex[2].pp_txoffset)) {
490 DRM_ERROR("Invalid texture offset for unit 2\n");
491 return DRM_ERR(EINVAL);
495 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
496 OUT_RING(tex[2].pp_txfilter);
497 OUT_RING(tex[2].pp_txformat);
498 OUT_RING(tex[2].pp_txoffset);
499 OUT_RING(tex[2].pp_txcblend);
500 OUT_RING(tex[2].pp_txablend);
501 OUT_RING(tex[2].pp_tfactor);
502 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
503 OUT_RING(tex[2].pp_border_color);
512 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
513 drm_file_t * filp_priv,
514 drm_radeon_state_t * state)
518 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
520 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
521 OUT_RING(state->context2.se_zbias_factor);
522 OUT_RING(state->context2.se_zbias_constant);
526 return radeon_emit_state(dev_priv, filp_priv, &state->context,
527 state->tex, state->dirty);
530 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
531 * 1.3 cmdbuffers allow all previous state to be updated as well as
532 * the tcl scalar and vector areas.
538 } packet[RADEON_MAX_STATE_PACKETS] = {
539 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
540 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
541 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
542 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
543 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
544 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
545 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
546 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
547 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
548 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
549 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
550 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
551 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
552 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
553 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
554 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
555 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
556 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
557 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
558 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
559 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
560 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
561 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
562 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
563 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
564 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
565 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
566 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
567 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
568 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
569 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
570 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
571 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
572 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
573 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
574 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
575 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
576 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
577 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
578 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
579 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
580 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
581 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
582 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
583 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
584 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
585 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
586 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
587 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
588 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
589 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
590 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
591 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
592 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
593 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
594 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
595 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
596 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
597 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
598 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
599 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
600 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
601 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
602 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
603 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
604 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
605 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
606 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
607 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
608 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
609 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
610 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
611 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
612 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
613 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
614 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
615 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
616 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
617 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
618 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
619 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
620 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
621 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
622 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
623 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
624 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
625 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
626 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
627 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
628 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
629 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
630 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
631 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
632 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
633 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
634 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
635 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
636 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
639 /* ================================================================
640 * Performance monitoring functions
643 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
644 int x, int y, int w, int h, int r, int g, int b)
649 x += dev_priv->sarea_priv->boxes[0].x1;
650 y += dev_priv->sarea_priv->boxes[0].y1;
652 switch (dev_priv->color_fmt) {
653 case RADEON_COLOR_FORMAT_RGB565:
654 color = (((r & 0xf8) << 8) |
655 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
657 case RADEON_COLOR_FORMAT_ARGB8888:
659 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
664 RADEON_WAIT_UNTIL_3D_IDLE();
665 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
666 OUT_RING(0xffffffff);
671 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
672 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
673 RADEON_GMC_BRUSH_SOLID_COLOR |
674 (dev_priv->color_fmt << 8) |
675 RADEON_GMC_SRC_DATATYPE_COLOR |
676 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
678 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
679 OUT_RING(dev_priv->front_pitch_offset);
681 OUT_RING(dev_priv->back_pitch_offset);
686 OUT_RING((x << 16) | y);
687 OUT_RING((w << 16) | h);
692 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
694 /* Collapse various things into a wait flag -- trying to
695 * guess if userspase slept -- better just to have them tell us.
697 if (dev_priv->stats.last_frame_reads > 1 ||
698 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
699 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
702 if (dev_priv->stats.freelist_loops) {
703 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
706 /* Purple box for page flipping
708 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
709 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
711 /* Red box if we have to wait for idle at any point
713 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
714 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
716 /* Blue box: lost context?
719 /* Yellow box for texture swaps
721 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
722 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
724 /* Green box if hardware never idles (as far as we can tell)
726 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
727 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
729 /* Draw bars indicating number of buffers allocated
730 * (not a great measure, easily confused)
732 if (dev_priv->stats.requested_bufs) {
733 if (dev_priv->stats.requested_bufs > 100)
734 dev_priv->stats.requested_bufs = 100;
736 radeon_clear_box(dev_priv, 4, 16,
737 dev_priv->stats.requested_bufs, 4,
741 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
745 /* ================================================================
746 * CP command dispatch functions
749 static void radeon_cp_dispatch_clear(drm_device_t * dev,
750 drm_radeon_clear_t * clear,
751 drm_radeon_clear_rect_t * depth_boxes)
753 drm_radeon_private_t *dev_priv = dev->dev_private;
754 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
755 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
756 int nbox = sarea_priv->nbox;
757 drm_clip_rect_t *pbox = sarea_priv->boxes;
758 unsigned int flags = clear->flags;
759 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
762 DRM_DEBUG("flags = 0x%x\n", flags);
764 dev_priv->stats.clears++;
766 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
767 unsigned int tmp = flags;
769 flags &= ~(RADEON_FRONT | RADEON_BACK);
770 if (tmp & RADEON_FRONT)
771 flags |= RADEON_BACK;
772 if (tmp & RADEON_BACK)
773 flags |= RADEON_FRONT;
776 if (flags & (RADEON_FRONT | RADEON_BACK)) {
780 /* Ensure the 3D stream is idle before doing a
781 * 2D fill to clear the front or back buffer.
783 RADEON_WAIT_UNTIL_3D_IDLE();
785 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
786 OUT_RING(clear->color_mask);
790 /* Make sure we restore the 3D state next time.
792 dev_priv->sarea_priv->ctx_owner = 0;
794 for (i = 0; i < nbox; i++) {
797 int w = pbox[i].x2 - x;
798 int h = pbox[i].y2 - y;
800 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
803 if (flags & RADEON_FRONT) {
807 (RADEON_CNTL_PAINT_MULTI, 4));
808 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
809 RADEON_GMC_BRUSH_SOLID_COLOR |
812 RADEON_GMC_SRC_DATATYPE_COLOR |
814 RADEON_GMC_CLR_CMP_CNTL_DIS);
816 OUT_RING(dev_priv->front_pitch_offset);
817 OUT_RING(clear->clear_color);
819 OUT_RING((x << 16) | y);
820 OUT_RING((w << 16) | h);
825 if (flags & RADEON_BACK) {
829 (RADEON_CNTL_PAINT_MULTI, 4));
830 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
831 RADEON_GMC_BRUSH_SOLID_COLOR |
834 RADEON_GMC_SRC_DATATYPE_COLOR |
836 RADEON_GMC_CLR_CMP_CNTL_DIS);
838 OUT_RING(dev_priv->back_pitch_offset);
839 OUT_RING(clear->clear_color);
841 OUT_RING((x << 16) | y);
842 OUT_RING((w << 16) | h);
850 /* no docs available, based on reverse engeneering by Stephane Marchesin */
851 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
852 && (flags & RADEON_CLEAR_FASTZ)) {
855 int depthpixperline =
856 dev_priv->depth_fmt ==
857 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
863 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
864 ((clear->depth_mask & 0xff) << 24);
866 /* Make sure we restore the 3D state next time.
867 * we haven't touched any "normal" state - still need this?
869 dev_priv->sarea_priv->ctx_owner = 0;
871 if ((dev_priv->flags & CHIP_HAS_HIERZ)
872 && (flags & RADEON_USE_HIERZ)) {
873 /* FIXME : reverse engineer that for Rx00 cards */
874 /* FIXME : the mask supposedly contains low-res z values. So can't set
875 just to the max (0xff? or actually 0x3fff?), need to take z clear
876 value into account? */
877 /* pattern seems to work for r100, though get slight
878 rendering errors with glxgears. If hierz is not enabled for r100,
879 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
880 other ones are ignored, and the same clear mask can be used. That's
881 very different behaviour than R200 which needs different clear mask
882 and different number of tiles to clear if hierz is enabled or not !?!
884 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
886 /* clear mask : chooses the clearing pattern.
887 rv250: could be used to clear only parts of macrotiles
888 (but that would get really complicated...)?
889 bit 0 and 1 (either or both of them ?!?!) are used to
890 not clear tile (or maybe one of the bits indicates if the tile is
891 compressed or not), bit 2 and 3 to not clear tile 1,...,.
892 Pattern is as follows:
893 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
894 bits -------------------------------------------------
895 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
896 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
897 covers 256 pixels ?!?
903 RADEON_WAIT_UNTIL_2D_IDLE();
904 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
905 tempRB3D_DEPTHCLEARVALUE);
906 /* what offset is this exactly ? */
907 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
908 /* need ctlstat, otherwise get some strange black flickering */
909 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
910 RADEON_RB3D_ZC_FLUSH_ALL);
913 for (i = 0; i < nbox; i++) {
914 int tileoffset, nrtilesx, nrtilesy, j;
915 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
916 if ((dev_priv->flags & CHIP_HAS_HIERZ)
917 && !(dev_priv->microcode_version == UCODE_R200)) {
918 /* FIXME : figure this out for r200 (when hierz is enabled). Or
919 maybe r200 actually doesn't need to put the low-res z value into
920 the tile cache like r100, but just needs to clear the hi-level z-buffer?
921 Works for R100, both with hierz and without.
922 R100 seems to operate on 2x1 8x8 tiles, but...
923 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
924 problematic with resolutions which are not 64 pix aligned? */
926 ((pbox[i].y1 >> 3) * depthpixperline +
929 ((pbox[i].x2 & ~63) -
930 (pbox[i].x1 & ~63)) >> 4;
932 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
933 for (j = 0; j <= nrtilesy; j++) {
936 (RADEON_3D_CLEAR_ZMASK, 2));
938 OUT_RING(tileoffset * 8);
939 /* the number of tiles to clear */
940 OUT_RING(nrtilesx + 4);
941 /* clear mask : chooses the clearing pattern. */
944 tileoffset += depthpixperline >> 6;
946 } else if (dev_priv->microcode_version == UCODE_R200) {
947 /* works for rv250. */
948 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
950 ((pbox[i].y1 >> 3) * depthpixperline +
953 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
955 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
956 for (j = 0; j <= nrtilesy; j++) {
959 (RADEON_3D_CLEAR_ZMASK, 2));
961 /* judging by the first tile offset needed, could possibly
962 directly address/clear 4x4 tiles instead of 8x2 * 4x4
963 macro tiles, though would still need clear mask for
964 right/bottom if truely 4x4 granularity is desired ? */
965 OUT_RING(tileoffset * 16);
966 /* the number of tiles to clear */
967 OUT_RING(nrtilesx + 1);
968 /* clear mask : chooses the clearing pattern. */
971 tileoffset += depthpixperline >> 5;
973 } else { /* rv 100 */
974 /* rv100 might not need 64 pix alignment, who knows */
975 /* offsets are, hmm, weird */
977 ((pbox[i].y1 >> 4) * depthpixperline +
980 ((pbox[i].x2 & ~63) -
981 (pbox[i].x1 & ~63)) >> 4;
983 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
984 for (j = 0; j <= nrtilesy; j++) {
987 (RADEON_3D_CLEAR_ZMASK, 2));
988 OUT_RING(tileoffset * 128);
989 /* the number of tiles to clear */
990 OUT_RING(nrtilesx + 4);
991 /* clear mask : chooses the clearing pattern. */
994 tileoffset += depthpixperline >> 6;
999 /* TODO don't always clear all hi-level z tiles */
1000 if ((dev_priv->flags & CHIP_HAS_HIERZ)
1001 && (dev_priv->microcode_version == UCODE_R200)
1002 && (flags & RADEON_USE_HIERZ))
1003 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1004 /* FIXME : the mask supposedly contains low-res z values. So can't set
1005 just to the max (0xff? or actually 0x3fff?), need to take z clear
1006 value into account? */
1009 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1010 OUT_RING(0x0); /* First tile */
1012 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1017 /* We have to clear the depth and/or stencil buffers by
1018 * rendering a quad into just those buffers. Thus, we have to
1019 * make sure the 3D engine is configured correctly.
1021 else if ((dev_priv->microcode_version == UCODE_R200) &&
1022 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1027 int tempRB3D_ZSTENCILCNTL;
1028 int tempRB3D_STENCILREFMASK;
1029 int tempRB3D_PLANEMASK;
1031 int tempSE_VTE_CNTL;
1032 int tempSE_VTX_FMT_0;
1033 int tempSE_VTX_FMT_1;
1034 int tempSE_VAP_CNTL;
1035 int tempRE_AUX_SCISSOR_CNTL;
1040 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1042 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1043 tempRB3D_STENCILREFMASK = 0x0;
1045 tempSE_CNTL = depth_clear->se_cntl;
1049 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1051 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1053 tempRB3D_PLANEMASK = 0x0;
1055 tempRE_AUX_SCISSOR_CNTL = 0x0;
1058 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1060 /* Vertex format (X, Y, Z, W) */
1062 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1063 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1064 tempSE_VTX_FMT_1 = 0x0;
1067 * Depth buffer specific enables
1069 if (flags & RADEON_DEPTH) {
1070 /* Enable depth buffer */
1071 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1073 /* Disable depth buffer */
1074 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1078 * Stencil buffer specific enables
1080 if (flags & RADEON_STENCIL) {
1081 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1082 tempRB3D_STENCILREFMASK = clear->depth_mask;
1084 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1085 tempRB3D_STENCILREFMASK = 0x00000000;
1088 if (flags & RADEON_USE_COMP_ZBUF) {
1089 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090 RADEON_Z_DECOMPRESSION_ENABLE;
1092 if (flags & RADEON_USE_HIERZ) {
1093 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1097 RADEON_WAIT_UNTIL_2D_IDLE();
1099 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1100 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1101 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1102 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1103 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1104 tempRB3D_STENCILREFMASK);
1105 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1106 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1107 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1108 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1109 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1110 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1111 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1114 /* Make sure we restore the 3D state next time.
1116 dev_priv->sarea_priv->ctx_owner = 0;
1118 for (i = 0; i < nbox; i++) {
1120 /* Funny that this should be required --
1123 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1126 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1127 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1128 RADEON_PRIM_WALK_RING |
1129 (3 << RADEON_NUM_VERTICES_SHIFT)));
1130 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1131 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133 OUT_RING(0x3f800000);
1134 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1136 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137 OUT_RING(0x3f800000);
1138 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1139 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1140 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141 OUT_RING(0x3f800000);
1144 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1146 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1148 rb3d_cntl = depth_clear->rb3d_cntl;
1150 if (flags & RADEON_DEPTH) {
1151 rb3d_cntl |= RADEON_Z_ENABLE;
1153 rb3d_cntl &= ~RADEON_Z_ENABLE;
1156 if (flags & RADEON_STENCIL) {
1157 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1158 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1160 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1161 rb3d_stencilrefmask = 0x00000000;
1164 if (flags & RADEON_USE_COMP_ZBUF) {
1165 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1166 RADEON_Z_DECOMPRESSION_ENABLE;
1168 if (flags & RADEON_USE_HIERZ) {
1169 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1173 RADEON_WAIT_UNTIL_2D_IDLE();
1175 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1176 OUT_RING(0x00000000);
1177 OUT_RING(rb3d_cntl);
1179 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1180 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1181 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1182 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1185 /* Make sure we restore the 3D state next time.
1187 dev_priv->sarea_priv->ctx_owner = 0;
1189 for (i = 0; i < nbox; i++) {
1191 /* Funny that this should be required --
1194 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1198 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1199 OUT_RING(RADEON_VTX_Z_PRESENT |
1200 RADEON_VTX_PKCOLOR_PRESENT);
1201 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1202 RADEON_PRIM_WALK_RING |
1203 RADEON_MAOS_ENABLE |
1204 RADEON_VTX_FMT_RADEON_MODE |
1205 (3 << RADEON_NUM_VERTICES_SHIFT)));
1207 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1208 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1212 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1213 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1214 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1217 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1218 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1219 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1226 /* Increment the clear counter. The client-side 3D driver must
1227 * wait on this value before performing the clear ioctl. We
1228 * need this because the card's so damned fast...
1230 dev_priv->sarea_priv->last_clear++;
1234 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1235 RADEON_WAIT_UNTIL_IDLE();
1240 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1242 drm_radeon_private_t *dev_priv = dev->dev_private;
1243 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1244 int nbox = sarea_priv->nbox;
1245 drm_clip_rect_t *pbox = sarea_priv->boxes;
1250 /* Do some trivial performance monitoring...
1252 if (dev_priv->do_boxes)
1253 radeon_cp_performance_boxes(dev_priv);
1255 /* Wait for the 3D stream to idle before dispatching the bitblt.
1256 * This will prevent data corruption between the two streams.
1260 RADEON_WAIT_UNTIL_3D_IDLE();
1264 for (i = 0; i < nbox; i++) {
1267 int w = pbox[i].x2 - x;
1268 int h = pbox[i].y2 - y;
1270 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1274 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1275 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1276 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1277 RADEON_GMC_BRUSH_NONE |
1278 (dev_priv->color_fmt << 8) |
1279 RADEON_GMC_SRC_DATATYPE_COLOR |
1281 RADEON_DP_SRC_SOURCE_MEMORY |
1282 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1284 /* Make this work even if front & back are flipped:
1286 if (dev_priv->current_page == 0) {
1287 OUT_RING(dev_priv->back_pitch_offset);
1288 OUT_RING(dev_priv->front_pitch_offset);
1290 OUT_RING(dev_priv->front_pitch_offset);
1291 OUT_RING(dev_priv->back_pitch_offset);
1294 OUT_RING((x << 16) | y);
1295 OUT_RING((x << 16) | y);
1296 OUT_RING((w << 16) | h);
1301 /* Increment the frame counter. The client-side 3D driver must
1302 * throttle the framerate by waiting for this value before
1303 * performing the swapbuffer ioctl.
1305 dev_priv->sarea_priv->last_frame++;
1309 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1310 RADEON_WAIT_UNTIL_2D_IDLE();
1315 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1317 drm_radeon_private_t *dev_priv = dev->dev_private;
1318 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1319 int offset = (dev_priv->current_page == 1)
1320 ? dev_priv->front_offset : dev_priv->back_offset;
1322 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1324 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1326 /* Do some trivial performance monitoring...
1328 if (dev_priv->do_boxes) {
1329 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1330 radeon_cp_performance_boxes(dev_priv);
1333 /* Update the frame offsets for both CRTCs
1337 RADEON_WAIT_UNTIL_3D_IDLE();
1338 OUT_RING_REG(RADEON_CRTC_OFFSET,
1339 ((sarea->frame.y * dev_priv->front_pitch +
1340 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1342 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1347 /* Increment the frame counter. The client-side 3D driver must
1348 * throttle the framerate by waiting for this value before
1349 * performing the swapbuffer ioctl.
1351 dev_priv->sarea_priv->last_frame++;
1352 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1353 1 - dev_priv->current_page;
1357 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1362 static int bad_prim_vertex_nr(int primitive, int nr)
1364 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1365 case RADEON_PRIM_TYPE_NONE:
1366 case RADEON_PRIM_TYPE_POINT:
1368 case RADEON_PRIM_TYPE_LINE:
1369 return (nr & 1) || nr == 0;
1370 case RADEON_PRIM_TYPE_LINE_STRIP:
1372 case RADEON_PRIM_TYPE_TRI_LIST:
1373 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1374 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1375 case RADEON_PRIM_TYPE_RECT_LIST:
1376 return nr % 3 || nr == 0;
1377 case RADEON_PRIM_TYPE_TRI_FAN:
1378 case RADEON_PRIM_TYPE_TRI_STRIP:
1387 unsigned int finish;
1389 unsigned int numverts;
1390 unsigned int offset;
1391 unsigned int vc_format;
1392 } drm_radeon_tcl_prim_t;
1394 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1396 drm_radeon_tcl_prim_t * prim)
1398 drm_radeon_private_t *dev_priv = dev->dev_private;
1399 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1400 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1401 int numverts = (int)prim->numverts;
1402 int nbox = sarea_priv->nbox;
1406 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1408 prim->vc_format, prim->start, prim->finish, prim->numverts);
1410 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1411 DRM_ERROR("bad prim %x numverts %d\n",
1412 prim->prim, prim->numverts);
1417 /* Emit the next cliprect */
1419 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1422 /* Emit the vertex buffer rendering commands */
1425 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1428 OUT_RING(prim->vc_format);
1429 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1430 RADEON_COLOR_ORDER_RGBA |
1431 RADEON_VTX_FMT_RADEON_MODE |
1432 (numverts << RADEON_NUM_VERTICES_SHIFT));
1440 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1442 drm_radeon_private_t *dev_priv = dev->dev_private;
1443 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1446 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1448 /* Emit the vertex buffer age */
1450 RADEON_DISPATCH_AGE(buf_priv->age);
1457 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1458 drm_buf_t * buf, int start, int end)
1460 drm_radeon_private_t *dev_priv = dev->dev_private;
1462 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1465 int offset = (dev_priv->gart_buffers_offset
1466 + buf->offset + start);
1467 int dwords = (end - start + 3) / sizeof(u32);
1469 /* Indirect buffer data must be an even number of
1470 * dwords, so if we've been given an odd number we must
1471 * pad the data with a Type-2 CP packet.
1475 ((char *)dev->agp_buffer_map->handle
1476 + buf->offset + start);
1477 data[dwords++] = RADEON_CP_PACKET2;
1480 /* Fire off the indirect buffer */
1483 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1491 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1492 drm_buf_t * elt_buf,
1493 drm_radeon_tcl_prim_t * prim)
1495 drm_radeon_private_t *dev_priv = dev->dev_private;
1496 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1497 int offset = dev_priv->gart_buffers_offset + prim->offset;
1501 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1502 int count = (prim->finish - start) / sizeof(u16);
1503 int nbox = sarea_priv->nbox;
1505 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1508 prim->start, prim->finish, prim->offset, prim->numverts);
1510 if (bad_prim_vertex_nr(prim->prim, count)) {
1511 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1515 if (start >= prim->finish || (prim->start & 0x7)) {
1516 DRM_ERROR("buffer prim %d\n", prim->prim);
1520 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1522 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1523 elt_buf->offset + prim->start);
1525 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1527 data[2] = prim->numverts;
1528 data[3] = prim->vc_format;
1529 data[4] = (prim->prim |
1530 RADEON_PRIM_WALK_IND |
1531 RADEON_COLOR_ORDER_RGBA |
1532 RADEON_VTX_FMT_RADEON_MODE |
1533 (count << RADEON_NUM_VERTICES_SHIFT));
1537 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1539 radeon_cp_dispatch_indirect(dev, elt_buf,
1540 prim->start, prim->finish);
1547 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1549 static int radeon_cp_dispatch_texture(DRMFILE filp,
1551 drm_radeon_texture_t * tex,
1552 drm_radeon_tex_image_t * image)
1554 drm_radeon_private_t *dev_priv = dev->dev_private;
1555 drm_file_t *filp_priv;
1559 const u8 __user *data;
1560 int size, dwords, tex_width, blit_width, spitch;
1563 u32 texpitch, microtile;
1567 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1569 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1570 DRM_ERROR("Invalid destination offset\n");
1571 return DRM_ERR(EINVAL);
1574 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1576 /* Flush the pixel cache. This ensures no pixel data gets mixed
1577 * up with the texture data from the host data blit, otherwise
1578 * part of the texture image may be corrupted.
1581 RADEON_FLUSH_CACHE();
1582 RADEON_WAIT_UNTIL_IDLE();
1585 /* The compiler won't optimize away a division by a variable,
1586 * even if the only legal values are powers of two. Thus, we'll
1587 * use a shift instead.
1589 switch (tex->format) {
1590 case RADEON_TXFORMAT_ARGB8888:
1591 case RADEON_TXFORMAT_RGBA8888:
1592 format = RADEON_COLOR_FORMAT_ARGB8888;
1593 tex_width = tex->width * 4;
1594 blit_width = image->width * 4;
1596 case RADEON_TXFORMAT_AI88:
1597 case RADEON_TXFORMAT_ARGB1555:
1598 case RADEON_TXFORMAT_RGB565:
1599 case RADEON_TXFORMAT_ARGB4444:
1600 case RADEON_TXFORMAT_VYUY422:
1601 case RADEON_TXFORMAT_YVYU422:
1602 format = RADEON_COLOR_FORMAT_RGB565;
1603 tex_width = tex->width * 2;
1604 blit_width = image->width * 2;
1606 case RADEON_TXFORMAT_I8:
1607 case RADEON_TXFORMAT_RGB332:
1608 format = RADEON_COLOR_FORMAT_CI8;
1609 tex_width = tex->width * 1;
1610 blit_width = image->width * 1;
1613 DRM_ERROR("invalid texture format %d\n", tex->format);
1614 return DRM_ERR(EINVAL);
1616 spitch = blit_width >> 6;
1617 if (spitch == 0 && image->height > 1)
1618 return DRM_ERR(EINVAL);
1620 texpitch = tex->pitch;
1621 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1623 if (tex_width < 64) {
1624 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1625 /* we got tiled coordinates, untile them */
1631 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1634 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1635 tex->offset >> 10, tex->pitch, tex->format,
1636 image->x, image->y, image->width, image->height);
1638 /* Make a copy of some parameters in case we have to
1639 * update them for a multi-pass texture blit.
1641 height = image->height;
1642 data = (const u8 __user *)image->data;
1644 size = height * blit_width;
1646 if (size > RADEON_MAX_TEXTURE_SIZE) {
1647 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1648 size = height * blit_width;
1649 } else if (size < 4 && size > 0) {
1651 } else if (size == 0) {
1655 buf = radeon_freelist_get(dev);
1657 radeon_do_cp_idle(dev_priv);
1658 buf = radeon_freelist_get(dev);
1661 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1662 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1663 return DRM_ERR(EFAULT);
1664 return DRM_ERR(EAGAIN);
1667 /* Dispatch the indirect buffer.
1670 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1673 #define RADEON_COPY_MT(_buf, _data, _width) \
1675 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1676 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1677 return DRM_ERR(EFAULT); \
1682 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1683 however, we cannot use blitter directly for texture width < 64 bytes,
1684 since minimum tex pitch is 64 bytes and we need this to match
1685 the texture width, otherwise the blitter will tile it wrong.
1686 Thus, tiling manually in this case. Additionally, need to special
1687 case tex height = 1, since our actual image will have height 2
1688 and we need to ensure we don't read beyond the texture size
1690 if (tex->height == 1) {
1691 if (tex_width >= 64 || tex_width <= 16) {
1692 RADEON_COPY_MT(buffer, data,
1693 (int)(tex_width * sizeof(u32)));
1694 } else if (tex_width == 32) {
1695 RADEON_COPY_MT(buffer, data, 16);
1696 RADEON_COPY_MT(buffer + 8,
1699 } else if (tex_width >= 64 || tex_width == 16) {
1700 RADEON_COPY_MT(buffer, data,
1701 (int)(dwords * sizeof(u32)));
1702 } else if (tex_width < 16) {
1703 for (i = 0; i < tex->height; i++) {
1704 RADEON_COPY_MT(buffer, data, tex_width);
1708 } else if (tex_width == 32) {
1709 /* TODO: make sure this works when not fitting in one buffer
1710 (i.e. 32bytes x 2048...) */
1711 for (i = 0; i < tex->height; i += 2) {
1712 RADEON_COPY_MT(buffer, data, 16);
1714 RADEON_COPY_MT(buffer + 8, data, 16);
1716 RADEON_COPY_MT(buffer + 4, data, 16);
1718 RADEON_COPY_MT(buffer + 12, data, 16);
1724 if (tex_width >= 32) {
1725 /* Texture image width is larger than the minimum, so we
1726 * can upload it directly.
1728 RADEON_COPY_MT(buffer, data,
1729 (int)(dwords * sizeof(u32)));
1731 /* Texture image width is less than the minimum, so we
1732 * need to pad out each image scanline to the minimum
1735 for (i = 0; i < tex->height; i++) {
1736 RADEON_COPY_MT(buffer, data, tex_width);
1743 #undef RADEON_COPY_MT
1746 offset = dev_priv->gart_buffers_offset + buf->offset;
1748 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1749 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1750 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1751 RADEON_GMC_BRUSH_NONE |
1753 RADEON_GMC_SRC_DATATYPE_COLOR |
1755 RADEON_DP_SRC_SOURCE_MEMORY |
1756 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1757 OUT_RING((spitch << 22) | (offset >> 10));
1758 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1760 OUT_RING((image->x << 16) | image->y);
1761 OUT_RING((image->width << 16) | height);
1762 RADEON_WAIT_UNTIL_2D_IDLE();
1765 radeon_cp_discard_buffer(dev, buf);
1767 /* Update the input parameters for next time */
1769 image->height -= height;
1770 image->data = (const u8 __user *)image->data + size;
1771 } while (image->height > 0);
1773 /* Flush the pixel cache after the blit completes. This ensures
1774 * the texture data is written out to memory before rendering
1778 RADEON_FLUSH_CACHE();
1779 RADEON_WAIT_UNTIL_2D_IDLE();
1784 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1786 drm_radeon_private_t *dev_priv = dev->dev_private;
1793 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1794 OUT_RING(0x00000000);
1796 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1797 for (i = 0; i < 32; i++) {
1798 OUT_RING(stipple[i]);
1804 static void radeon_apply_surface_regs(int surf_index,
1805 drm_radeon_private_t *dev_priv)
1807 if (!dev_priv->mmio)
1810 radeon_do_cp_idle(dev_priv);
1812 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1813 dev_priv->surfaces[surf_index].flags);
1814 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1815 dev_priv->surfaces[surf_index].lower);
1816 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1817 dev_priv->surfaces[surf_index].upper);
1820 /* Allocates a virtual surface
1821 * doesn't always allocate a real surface, will stretch an existing
1822 * surface when possible.
1824 * Note that refcount can be at most 2, since during a free refcount=3
1825 * might mean we have to allocate a new surface which might not always
1827 * For example : we allocate three contigous surfaces ABC. If B is
1828 * freed, we suddenly need two surfaces to store A and C, which might
1829 * not always be available.
1831 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1832 drm_radeon_private_t *dev_priv, DRMFILE filp)
1834 struct radeon_virt_surface *s;
1836 int virt_surface_index;
1837 uint32_t new_upper, new_lower;
1839 new_lower = new->address;
1840 new_upper = new_lower + new->size - 1;
1843 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1844 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1845 RADEON_SURF_ADDRESS_FIXED_MASK)
1846 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1849 /* make sure there is no overlap with existing surfaces */
1850 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1851 if ((dev_priv->surfaces[i].refcount != 0) &&
1852 (((new_lower >= dev_priv->surfaces[i].lower) &&
1853 (new_lower < dev_priv->surfaces[i].upper)) ||
1854 ((new_lower < dev_priv->surfaces[i].lower) &&
1855 (new_upper > dev_priv->surfaces[i].lower)))) {
1860 /* find a virtual surface */
1861 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1862 if (dev_priv->virt_surfaces[i].filp == 0)
1864 if (i == 2 * RADEON_MAX_SURFACES) {
1867 virt_surface_index = i;
1869 /* try to reuse an existing surface */
1870 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1872 if ((dev_priv->surfaces[i].refcount == 1) &&
1873 (new->flags == dev_priv->surfaces[i].flags) &&
1874 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1875 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1876 s->surface_index = i;
1877 s->lower = new_lower;
1878 s->upper = new_upper;
1879 s->flags = new->flags;
1881 dev_priv->surfaces[i].refcount++;
1882 dev_priv->surfaces[i].lower = s->lower;
1883 radeon_apply_surface_regs(s->surface_index, dev_priv);
1884 return virt_surface_index;
1888 if ((dev_priv->surfaces[i].refcount == 1) &&
1889 (new->flags == dev_priv->surfaces[i].flags) &&
1890 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1891 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1892 s->surface_index = i;
1893 s->lower = new_lower;
1894 s->upper = new_upper;
1895 s->flags = new->flags;
1897 dev_priv->surfaces[i].refcount++;
1898 dev_priv->surfaces[i].upper = s->upper;
1899 radeon_apply_surface_regs(s->surface_index, dev_priv);
1900 return virt_surface_index;
1904 /* okay, we need a new one */
1905 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1906 if (dev_priv->surfaces[i].refcount == 0) {
1907 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1908 s->surface_index = i;
1909 s->lower = new_lower;
1910 s->upper = new_upper;
1911 s->flags = new->flags;
1913 dev_priv->surfaces[i].refcount = 1;
1914 dev_priv->surfaces[i].lower = s->lower;
1915 dev_priv->surfaces[i].upper = s->upper;
1916 dev_priv->surfaces[i].flags = s->flags;
1917 radeon_apply_surface_regs(s->surface_index, dev_priv);
1918 return virt_surface_index;
1922 /* we didn't find anything */
1926 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1929 struct radeon_virt_surface *s;
1931 /* find the virtual surface */
1932 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1933 s = &(dev_priv->virt_surfaces[i]);
1935 if ((lower == s->lower) && (filp == s->filp)) {
1936 if (dev_priv->surfaces[s->surface_index].
1938 dev_priv->surfaces[s->surface_index].
1941 if (dev_priv->surfaces[s->surface_index].
1943 dev_priv->surfaces[s->surface_index].
1946 dev_priv->surfaces[s->surface_index].refcount--;
1947 if (dev_priv->surfaces[s->surface_index].
1949 dev_priv->surfaces[s->surface_index].
1952 radeon_apply_surface_regs(s->surface_index,
1961 static void radeon_surfaces_release(DRMFILE filp,
1962 drm_radeon_private_t * dev_priv)
1965 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1966 if (dev_priv->virt_surfaces[i].filp == filp)
1967 free_surface(filp, dev_priv,
1968 dev_priv->virt_surfaces[i].lower);
1972 /* ================================================================
1975 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1978 drm_radeon_private_t *dev_priv = dev->dev_private;
1979 drm_radeon_surface_alloc_t alloc;
1981 DRM_COPY_FROM_USER_IOCTL(alloc,
1982 (drm_radeon_surface_alloc_t __user *) data,
1985 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1986 return DRM_ERR(EINVAL);
1991 static int radeon_surface_free(DRM_IOCTL_ARGS)
1994 drm_radeon_private_t *dev_priv = dev->dev_private;
1995 drm_radeon_surface_free_t memfree;
1997 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2000 if (free_surface(filp, dev_priv, memfree.address))
2001 return DRM_ERR(EINVAL);
2006 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2009 drm_radeon_private_t *dev_priv = dev->dev_private;
2010 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2011 drm_radeon_clear_t clear;
2012 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2015 LOCK_TEST_WITH_RETURN(dev, filp);
2017 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2020 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2022 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2023 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2025 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2026 sarea_priv->nbox * sizeof(depth_boxes[0])))
2027 return DRM_ERR(EFAULT);
2029 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2035 /* Not sure why this isn't set all the time:
2037 static int radeon_do_init_pageflip(drm_device_t * dev)
2039 drm_radeon_private_t *dev_priv = dev->dev_private;
2045 RADEON_WAIT_UNTIL_3D_IDLE();
2046 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2047 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2048 RADEON_CRTC_OFFSET_FLIP_CNTL);
2049 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2050 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2051 RADEON_CRTC_OFFSET_FLIP_CNTL);
2054 dev_priv->page_flipping = 1;
2055 dev_priv->current_page = 0;
2056 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2061 /* Called whenever a client dies, from drm_release.
2062 * NOTE: Lock isn't necessarily held when this is called!
2064 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2066 drm_radeon_private_t *dev_priv = dev->dev_private;
2069 if (dev_priv->current_page != 0)
2070 radeon_cp_dispatch_flip(dev);
2072 dev_priv->page_flipping = 0;
2076 /* Swapping and flipping are different operations, need different ioctls.
2077 * They can & should be intermixed to support multiple 3d windows.
2079 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2082 drm_radeon_private_t *dev_priv = dev->dev_private;
2085 LOCK_TEST_WITH_RETURN(dev, filp);
2087 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2089 if (!dev_priv->page_flipping)
2090 radeon_do_init_pageflip(dev);
2092 radeon_cp_dispatch_flip(dev);
2098 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2101 drm_radeon_private_t *dev_priv = dev->dev_private;
2102 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105 LOCK_TEST_WITH_RETURN(dev, filp);
2107 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2109 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2110 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2112 radeon_cp_dispatch_swap(dev);
2113 dev_priv->sarea_priv->ctx_owner = 0;
2119 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2122 drm_radeon_private_t *dev_priv = dev->dev_private;
2123 drm_file_t *filp_priv;
2124 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2125 drm_device_dma_t *dma = dev->dma;
2127 drm_radeon_vertex_t vertex;
2128 drm_radeon_tcl_prim_t prim;
2130 LOCK_TEST_WITH_RETURN(dev, filp);
2132 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2134 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2137 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2138 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2140 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2141 DRM_ERROR("buffer index %d (of %d max)\n",
2142 vertex.idx, dma->buf_count - 1);
2143 return DRM_ERR(EINVAL);
2145 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2146 DRM_ERROR("buffer prim %d\n", vertex.prim);
2147 return DRM_ERR(EINVAL);
2150 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2151 VB_AGE_TEST_WITH_RETURN(dev_priv);
2153 buf = dma->buflist[vertex.idx];
2155 if (buf->filp != filp) {
2156 DRM_ERROR("process %d using buffer owned by %p\n",
2157 DRM_CURRENTPID, buf->filp);
2158 return DRM_ERR(EINVAL);
2161 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2162 return DRM_ERR(EINVAL);
2165 /* Build up a prim_t record:
2168 buf->used = vertex.count; /* not used? */
2170 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2171 if (radeon_emit_state(dev_priv, filp_priv,
2172 &sarea_priv->context_state,
2173 sarea_priv->tex_state,
2174 sarea_priv->dirty)) {
2175 DRM_ERROR("radeon_emit_state failed\n");
2176 return DRM_ERR(EINVAL);
2179 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2180 RADEON_UPLOAD_TEX1IMAGES |
2181 RADEON_UPLOAD_TEX2IMAGES |
2182 RADEON_REQUIRE_QUIESCENCE);
2186 prim.finish = vertex.count; /* unused */
2187 prim.prim = vertex.prim;
2188 prim.numverts = vertex.count;
2189 prim.vc_format = dev_priv->sarea_priv->vc_format;
2191 radeon_cp_dispatch_vertex(dev, buf, &prim);
2194 if (vertex.discard) {
2195 radeon_cp_discard_buffer(dev, buf);
2202 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2205 drm_radeon_private_t *dev_priv = dev->dev_private;
2206 drm_file_t *filp_priv;
2207 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2208 drm_device_dma_t *dma = dev->dma;
2210 drm_radeon_indices_t elts;
2211 drm_radeon_tcl_prim_t prim;
2214 LOCK_TEST_WITH_RETURN(dev, filp);
2216 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2218 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2221 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2222 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2224 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2225 DRM_ERROR("buffer index %d (of %d max)\n",
2226 elts.idx, dma->buf_count - 1);
2227 return DRM_ERR(EINVAL);
2229 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2230 DRM_ERROR("buffer prim %d\n", elts.prim);
2231 return DRM_ERR(EINVAL);
2234 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2235 VB_AGE_TEST_WITH_RETURN(dev_priv);
2237 buf = dma->buflist[elts.idx];
2239 if (buf->filp != filp) {
2240 DRM_ERROR("process %d using buffer owned by %p\n",
2241 DRM_CURRENTPID, buf->filp);
2242 return DRM_ERR(EINVAL);
2245 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2246 return DRM_ERR(EINVAL);
2249 count = (elts.end - elts.start) / sizeof(u16);
2250 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2252 if (elts.start & 0x7) {
2253 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2254 return DRM_ERR(EINVAL);
2256 if (elts.start < buf->used) {
2257 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2258 return DRM_ERR(EINVAL);
2261 buf->used = elts.end;
2263 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2264 if (radeon_emit_state(dev_priv, filp_priv,
2265 &sarea_priv->context_state,
2266 sarea_priv->tex_state,
2267 sarea_priv->dirty)) {
2268 DRM_ERROR("radeon_emit_state failed\n");
2269 return DRM_ERR(EINVAL);
2272 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2273 RADEON_UPLOAD_TEX1IMAGES |
2274 RADEON_UPLOAD_TEX2IMAGES |
2275 RADEON_REQUIRE_QUIESCENCE);
2278 /* Build up a prim_t record:
2280 prim.start = elts.start;
2281 prim.finish = elts.end;
2282 prim.prim = elts.prim;
2283 prim.offset = 0; /* offset from start of dma buffers */
2284 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2285 prim.vc_format = dev_priv->sarea_priv->vc_format;
2287 radeon_cp_dispatch_indices(dev, buf, &prim);
2289 radeon_cp_discard_buffer(dev, buf);
2296 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2299 drm_radeon_private_t *dev_priv = dev->dev_private;
2300 drm_radeon_texture_t tex;
2301 drm_radeon_tex_image_t image;
2304 LOCK_TEST_WITH_RETURN(dev, filp);
2306 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2309 if (tex.image == NULL) {
2310 DRM_ERROR("null texture image!\n");
2311 return DRM_ERR(EINVAL);
2314 if (DRM_COPY_FROM_USER(&image,
2315 (drm_radeon_tex_image_t __user *) tex.image,
2317 return DRM_ERR(EFAULT);
2319 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2320 VB_AGE_TEST_WITH_RETURN(dev_priv);
2322 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2328 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2331 drm_radeon_private_t *dev_priv = dev->dev_private;
2332 drm_radeon_stipple_t stipple;
2335 LOCK_TEST_WITH_RETURN(dev, filp);
2337 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2340 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2341 return DRM_ERR(EFAULT);
2343 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2345 radeon_cp_dispatch_stipple(dev, mask);
2351 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2354 drm_radeon_private_t *dev_priv = dev->dev_private;
2355 drm_device_dma_t *dma = dev->dma;
2357 drm_radeon_indirect_t indirect;
2360 LOCK_TEST_WITH_RETURN(dev, filp);
2362 DRM_COPY_FROM_USER_IOCTL(indirect,
2363 (drm_radeon_indirect_t __user *) data,
2366 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2367 indirect.idx, indirect.start, indirect.end, indirect.discard);
2369 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2370 DRM_ERROR("buffer index %d (of %d max)\n",
2371 indirect.idx, dma->buf_count - 1);
2372 return DRM_ERR(EINVAL);
2375 buf = dma->buflist[indirect.idx];
2377 if (buf->filp != filp) {
2378 DRM_ERROR("process %d using buffer owned by %p\n",
2379 DRM_CURRENTPID, buf->filp);
2380 return DRM_ERR(EINVAL);
2383 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2384 return DRM_ERR(EINVAL);
2387 if (indirect.start < buf->used) {
2388 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2389 indirect.start, buf->used);
2390 return DRM_ERR(EINVAL);
2393 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2394 VB_AGE_TEST_WITH_RETURN(dev_priv);
2396 buf->used = indirect.end;
2398 /* Wait for the 3D stream to idle before the indirect buffer
2399 * containing 2D acceleration commands is processed.
2403 RADEON_WAIT_UNTIL_3D_IDLE();
2407 /* Dispatch the indirect buffer full of commands from the
2408 * X server. This is insecure and is thus only available to
2409 * privileged clients.
2411 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2412 if (indirect.discard) {
2413 radeon_cp_discard_buffer(dev, buf);
2420 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2423 drm_radeon_private_t *dev_priv = dev->dev_private;
2424 drm_file_t *filp_priv;
2425 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2426 drm_device_dma_t *dma = dev->dma;
2428 drm_radeon_vertex2_t vertex;
2430 unsigned char laststate;
2432 LOCK_TEST_WITH_RETURN(dev, filp);
2434 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2436 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2439 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2440 DRM_CURRENTPID, vertex.idx, vertex.discard);
2442 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2443 DRM_ERROR("buffer index %d (of %d max)\n",
2444 vertex.idx, dma->buf_count - 1);
2445 return DRM_ERR(EINVAL);
2448 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2449 VB_AGE_TEST_WITH_RETURN(dev_priv);
2451 buf = dma->buflist[vertex.idx];
2453 if (buf->filp != filp) {
2454 DRM_ERROR("process %d using buffer owned by %p\n",
2455 DRM_CURRENTPID, buf->filp);
2456 return DRM_ERR(EINVAL);
2460 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2461 return DRM_ERR(EINVAL);
2464 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2465 return DRM_ERR(EINVAL);
2467 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2468 drm_radeon_prim_t prim;
2469 drm_radeon_tcl_prim_t tclprim;
2471 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2472 return DRM_ERR(EFAULT);
2474 if (prim.stateidx != laststate) {
2475 drm_radeon_state_t state;
2477 if (DRM_COPY_FROM_USER(&state,
2478 &vertex.state[prim.stateidx],
2480 return DRM_ERR(EFAULT);
2482 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2483 DRM_ERROR("radeon_emit_state2 failed\n");
2484 return DRM_ERR(EINVAL);
2487 laststate = prim.stateidx;
2490 tclprim.start = prim.start;
2491 tclprim.finish = prim.finish;
2492 tclprim.prim = prim.prim;
2493 tclprim.vc_format = prim.vc_format;
2495 if (prim.prim & RADEON_PRIM_WALK_IND) {
2496 tclprim.offset = prim.numverts * 64;
2497 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2499 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2501 tclprim.numverts = prim.numverts;
2502 tclprim.offset = 0; /* not used */
2504 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2507 if (sarea_priv->nbox == 1)
2508 sarea_priv->nbox = 0;
2511 if (vertex.discard) {
2512 radeon_cp_discard_buffer(dev, buf);
2519 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2520 drm_file_t * filp_priv,
2521 drm_radeon_cmd_header_t header,
2522 drm_radeon_kcmd_buffer_t *cmdbuf)
2524 int id = (int)header.packet.packet_id;
2526 int *data = (int *)cmdbuf->buf;
2529 if (id >= RADEON_MAX_STATE_PACKETS)
2530 return DRM_ERR(EINVAL);
2532 sz = packet[id].len;
2533 reg = packet[id].start;
2535 if (sz * sizeof(int) > cmdbuf->bufsz) {
2536 DRM_ERROR("Packet size provided larger than data provided\n");
2537 return DRM_ERR(EINVAL);
2540 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2541 DRM_ERROR("Packet verification failed\n");
2542 return DRM_ERR(EINVAL);
2546 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2547 OUT_RING_TABLE(data, sz);
2550 cmdbuf->buf += sz * sizeof(int);
2551 cmdbuf->bufsz -= sz * sizeof(int);
2555 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2556 drm_radeon_cmd_header_t header,
2557 drm_radeon_kcmd_buffer_t *cmdbuf)
2559 int sz = header.scalars.count;
2560 int start = header.scalars.offset;
2561 int stride = header.scalars.stride;
2565 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2566 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2567 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2568 OUT_RING_TABLE(cmdbuf->buf, sz);
2570 cmdbuf->buf += sz * sizeof(int);
2571 cmdbuf->bufsz -= sz * sizeof(int);
2577 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2578 drm_radeon_cmd_header_t header,
2579 drm_radeon_kcmd_buffer_t *cmdbuf)
2581 int sz = header.scalars.count;
2582 int start = ((unsigned int)header.scalars.offset) + 0x100;
2583 int stride = header.scalars.stride;
2587 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2588 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2589 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2590 OUT_RING_TABLE(cmdbuf->buf, sz);
2592 cmdbuf->buf += sz * sizeof(int);
2593 cmdbuf->bufsz -= sz * sizeof(int);
2597 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2598 drm_radeon_cmd_header_t header,
2599 drm_radeon_kcmd_buffer_t *cmdbuf)
2601 int sz = header.vectors.count;
2602 int start = header.vectors.offset;
2603 int stride = header.vectors.stride;
2607 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2608 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2609 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2610 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2611 OUT_RING_TABLE(cmdbuf->buf, sz);
2614 cmdbuf->buf += sz * sizeof(int);
2615 cmdbuf->bufsz -= sz * sizeof(int);
2619 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2620 drm_radeon_cmd_header_t header,
2621 drm_radeon_kcmd_buffer_t *cmdbuf)
2623 int sz = header.veclinear.count * 4;
2624 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2629 if (sz * 4 > cmdbuf->bufsz)
2630 return DRM_ERR(EINVAL);
2633 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2634 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2635 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2636 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2637 OUT_RING_TABLE(cmdbuf->buf, sz);
2640 cmdbuf->buf += sz * sizeof(int);
2641 cmdbuf->bufsz -= sz * sizeof(int);
2645 static int radeon_emit_packet3(drm_device_t * dev,
2646 drm_file_t * filp_priv,
2647 drm_radeon_kcmd_buffer_t *cmdbuf)
2649 drm_radeon_private_t *dev_priv = dev->dev_private;
2656 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2658 DRM_ERROR("Packet verification failed\n");
2663 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2666 cmdbuf->buf += cmdsz * 4;
2667 cmdbuf->bufsz -= cmdsz * 4;
2671 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2672 drm_file_t *filp_priv,
2673 drm_radeon_kcmd_buffer_t *cmdbuf,
2676 drm_radeon_private_t *dev_priv = dev->dev_private;
2677 drm_clip_rect_t box;
2680 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2686 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2688 DRM_ERROR("Packet verification failed\n");
2696 if (i < cmdbuf->nbox) {
2697 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2698 return DRM_ERR(EFAULT);
2699 /* FIXME The second and subsequent times round
2700 * this loop, send a WAIT_UNTIL_3D_IDLE before
2701 * calling emit_clip_rect(). This fixes a
2702 * lockup on fast machines when sending
2703 * several cliprects with a cmdbuf, as when
2704 * waving a 2D window over a 3D
2705 * window. Something in the commands from user
2706 * space seems to hang the card when they're
2707 * sent several times in a row. That would be
2708 * the correct place to fix it but this works
2709 * around it until I can figure that out - Tim
2713 RADEON_WAIT_UNTIL_3D_IDLE();
2716 radeon_emit_clip_rect(dev_priv, &box);
2720 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2723 } while (++i < cmdbuf->nbox);
2724 if (cmdbuf->nbox == 1)
2728 cmdbuf->buf += cmdsz * 4;
2729 cmdbuf->bufsz -= cmdsz * 4;
2733 static int radeon_emit_wait(drm_device_t * dev, int flags)
2735 drm_radeon_private_t *dev_priv = dev->dev_private;
2738 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2740 case RADEON_WAIT_2D:
2742 RADEON_WAIT_UNTIL_2D_IDLE();
2745 case RADEON_WAIT_3D:
2747 RADEON_WAIT_UNTIL_3D_IDLE();
2750 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2752 RADEON_WAIT_UNTIL_IDLE();
2756 return DRM_ERR(EINVAL);
2762 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2765 drm_radeon_private_t *dev_priv = dev->dev_private;
2766 drm_file_t *filp_priv;
2767 drm_device_dma_t *dma = dev->dma;
2768 drm_buf_t *buf = NULL;
2770 drm_radeon_kcmd_buffer_t cmdbuf;
2771 drm_radeon_cmd_header_t header;
2772 int orig_nbox, orig_bufsz;
2775 LOCK_TEST_WITH_RETURN(dev, filp);
2777 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2779 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2780 (drm_radeon_cmd_buffer_t __user *) data,
2783 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2784 VB_AGE_TEST_WITH_RETURN(dev_priv);
2786 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2787 return DRM_ERR(EINVAL);
2790 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2791 * races between checking values and using those values in other code,
2792 * and simply to avoid a lot of function calls to copy in data.
2794 orig_bufsz = cmdbuf.bufsz;
2795 if (orig_bufsz != 0) {
2796 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2798 return DRM_ERR(ENOMEM);
2799 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2801 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2802 return DRM_ERR(EFAULT);
2807 orig_nbox = cmdbuf.nbox;
2809 if (dev_priv->microcode_version == UCODE_R300) {
2811 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2813 if (orig_bufsz != 0)
2814 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2819 /* microcode_version != r300 */
2820 while (cmdbuf.bufsz >= sizeof(header)) {
2822 header.i = *(int *)cmdbuf.buf;
2823 cmdbuf.buf += sizeof(header);
2824 cmdbuf.bufsz -= sizeof(header);
2826 switch (header.header.cmd_type) {
2827 case RADEON_CMD_PACKET:
2828 DRM_DEBUG("RADEON_CMD_PACKET\n");
2829 if (radeon_emit_packets
2830 (dev_priv, filp_priv, header, &cmdbuf)) {
2831 DRM_ERROR("radeon_emit_packets failed\n");
2836 case RADEON_CMD_SCALARS:
2837 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2838 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2839 DRM_ERROR("radeon_emit_scalars failed\n");
2844 case RADEON_CMD_VECTORS:
2845 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2846 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2847 DRM_ERROR("radeon_emit_vectors failed\n");
2852 case RADEON_CMD_DMA_DISCARD:
2853 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2854 idx = header.dma.buf_idx;
2855 if (idx < 0 || idx >= dma->buf_count) {
2856 DRM_ERROR("buffer index %d (of %d max)\n",
2857 idx, dma->buf_count - 1);
2861 buf = dma->buflist[idx];
2862 if (buf->filp != filp || buf->pending) {
2863 DRM_ERROR("bad buffer %p %p %d\n",
2864 buf->filp, filp, buf->pending);
2868 radeon_cp_discard_buffer(dev, buf);
2871 case RADEON_CMD_PACKET3:
2872 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2873 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2874 DRM_ERROR("radeon_emit_packet3 failed\n");
2879 case RADEON_CMD_PACKET3_CLIP:
2880 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2881 if (radeon_emit_packet3_cliprect
2882 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2883 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2888 case RADEON_CMD_SCALARS2:
2889 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2890 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2891 DRM_ERROR("radeon_emit_scalars2 failed\n");
2896 case RADEON_CMD_WAIT:
2897 DRM_DEBUG("RADEON_CMD_WAIT\n");
2898 if (radeon_emit_wait(dev, header.wait.flags)) {
2899 DRM_ERROR("radeon_emit_wait failed\n");
2903 case RADEON_CMD_VECLINEAR:
2904 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2905 if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2906 DRM_ERROR("radeon_emit_veclinear failed\n");
2912 DRM_ERROR("bad cmd_type %d at %p\n",
2913 header.header.cmd_type,
2914 cmdbuf.buf - sizeof(header));
2919 if (orig_bufsz != 0)
2920 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2922 DRM_DEBUG("DONE\n");
2927 if (orig_bufsz != 0)
2928 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2929 return DRM_ERR(EINVAL);
2932 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2935 drm_radeon_private_t *dev_priv = dev->dev_private;
2936 drm_radeon_getparam_t param;
2939 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2942 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2944 switch (param.param) {
2945 case RADEON_PARAM_GART_BUFFER_OFFSET:
2946 value = dev_priv->gart_buffers_offset;
2948 case RADEON_PARAM_LAST_FRAME:
2949 dev_priv->stats.last_frame_reads++;
2950 value = GET_SCRATCH(0);
2952 case RADEON_PARAM_LAST_DISPATCH:
2953 value = GET_SCRATCH(1);
2955 case RADEON_PARAM_LAST_CLEAR:
2956 dev_priv->stats.last_clear_reads++;
2957 value = GET_SCRATCH(2);
2959 case RADEON_PARAM_IRQ_NR:
2962 case RADEON_PARAM_GART_BASE:
2963 value = dev_priv->gart_vm_start;
2965 case RADEON_PARAM_REGISTER_HANDLE:
2966 value = dev_priv->mmio->offset;
2968 case RADEON_PARAM_STATUS_HANDLE:
2969 value = dev_priv->ring_rptr_offset;
2971 #if BITS_PER_LONG == 32
2973 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2974 * pointer which can't fit into an int-sized variable. According to
2975 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2976 * not supporting it shouldn't be a problem. If the same functionality
2977 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2978 * so backwards-compatibility for the embedded platforms can be
2979 * maintained. --davidm 4-Feb-2004.
2981 case RADEON_PARAM_SAREA_HANDLE:
2982 /* The lock is the first dword in the sarea. */
2983 value = (long)dev->lock.hw_lock;
2986 case RADEON_PARAM_GART_TEX_HANDLE:
2987 value = dev_priv->gart_textures_offset;
2989 case RADEON_PARAM_SCRATCH_OFFSET:
2990 if (!dev_priv->writeback_works)
2991 return DRM_ERR(EINVAL);
2992 value = RADEON_SCRATCH_REG_OFFSET;
2994 case RADEON_PARAM_CARD_TYPE:
2995 if (dev_priv->flags & CHIP_IS_PCIE)
2996 value = RADEON_CARD_PCIE;
2997 else if (dev_priv->flags & CHIP_IS_AGP)
2998 value = RADEON_CARD_AGP;
3000 value = RADEON_CARD_PCI;
3003 DRM_DEBUG("Invalid parameter %d\n", param.param);
3004 return DRM_ERR(EINVAL);
3007 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3008 DRM_ERROR("copy_to_user\n");
3009 return DRM_ERR(EFAULT);
3015 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3018 drm_radeon_private_t *dev_priv = dev->dev_private;
3019 drm_file_t *filp_priv;
3020 drm_radeon_setparam_t sp;
3021 struct drm_radeon_driver_file_fields *radeon_priv;
3023 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3025 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3029 case RADEON_SETPARAM_FB_LOCATION:
3030 radeon_priv = filp_priv->driver_priv;
3031 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3033 case RADEON_SETPARAM_SWITCH_TILING:
3034 if (sp.value == 0) {
3035 DRM_DEBUG("color tiling disabled\n");
3036 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3037 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3038 dev_priv->sarea_priv->tiling_enabled = 0;
3039 } else if (sp.value == 1) {
3040 DRM_DEBUG("color tiling enabled\n");
3041 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3042 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3043 dev_priv->sarea_priv->tiling_enabled = 1;
3046 case RADEON_SETPARAM_PCIGART_LOCATION:
3047 dev_priv->pcigart_offset = sp.value;
3049 case RADEON_SETPARAM_NEW_MEMMAP:
3050 dev_priv->new_memmap = sp.value;
3053 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3054 return DRM_ERR(EINVAL);
3060 /* When a client dies:
3061 * - Check for and clean up flipped page state
3062 * - Free any alloced GART memory.
3063 * - Free any alloced radeon surfaces.
3065 * DRM infrastructure takes care of reclaiming dma buffers.
3067 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3069 if (dev->dev_private) {
3070 drm_radeon_private_t *dev_priv = dev->dev_private;
3071 if (dev_priv->page_flipping) {
3072 radeon_do_cleanup_pageflip(dev);
3074 radeon_mem_release(filp, dev_priv->gart_heap);
3075 radeon_mem_release(filp, dev_priv->fb_heap);
3076 radeon_surfaces_release(filp, dev_priv);
3080 void radeon_driver_lastclose(drm_device_t * dev)
3082 radeon_do_release(dev);
3085 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3087 drm_radeon_private_t *dev_priv = dev->dev_private;
3088 struct drm_radeon_driver_file_fields *radeon_priv;
3092 (struct drm_radeon_driver_file_fields *)
3093 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3098 filp_priv->driver_priv = radeon_priv;
3101 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3103 radeon_priv->radeon_fb_delta = 0;
3107 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3109 struct drm_radeon_driver_file_fields *radeon_priv =
3110 filp_priv->driver_priv;
3112 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3115 drm_ioctl_desc_t radeon_ioctls[] = {
3116 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3117 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3118 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3121 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3122 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3127 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3128 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3131 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3132 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3133 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3134 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3135 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3138 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3139 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3140 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3141 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3142 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3145 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);