diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon.h xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon.h
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon.h	2005-10-10 15:42:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon.h	2005-10-12 08:47:04.000000000 +1000
@@ -102,6 +102,8 @@
 /* ------------------------------------- */
 
 #define RADEON_DEBUG            0 /* Turn off debugging output               */
+#define RADEON_FB_PROTECT	0 /* Turn on FB protection */
+
 #define RADEON_IDLE_RETRY      16 /* Fall out of idle loops after this count */
 #define RADEON_TIMEOUT    2000000 /* Fall out of wait loops after this count */
 #define RADEON_MMIOSIZE   0x80000
@@ -328,6 +330,9 @@
 
     unsigned char     *MMIO;            /* Map of MMIO region                */
     unsigned char     *FB;              /* Map of frame buffer               */
+#if RADEON_FB_PROTECT
+    unsigned char     *RealFB;
+#endif
     CARD8             *VBIOS;           /* Video BIOS pointer                */
 
     Bool              IsAtomBios;       /* New BIOS used in R420 etc.        */
@@ -337,7 +342,6 @@
     CARD32            MemCntl;
     CARD32            BusCntl;
     unsigned long     FbMapSize;        /* Size of frame buffer, in bytes    */
-    unsigned long     FbSecureSize;     /* Size of secured fb area at end of framebuffer */
     int               Flags;            /* Saved copy of mode flags          */
 
 				/* VE/M6 support */
@@ -585,6 +589,10 @@
     FBAreaPtr         depthTexArea;
 #endif
 
+#ifdef USE_EXA
+    Bool              sgDMA;
+    CARD32            sgDMADescOffset;
+#endif
 				/* Saved scissor values */
     CARD32            sc_left;
     CARD32            sc_right;
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c	2005-09-15 08:06:41.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c	2005-09-28 10:32:51.000000000 +1000
@@ -77,6 +77,7 @@
 				/* Driver data structures */
 #include "radeon.h"
 #include "radeon_reg.h"
+#include "r300_reg.h"
 #include "radeon_macros.h"
 #include "radeon_probe.h"
 #include "radeon_version.h"
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h	2005-09-11 18:51:38.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_common.h	2005-10-09 09:54:15.000000000 +1000
@@ -76,6 +76,11 @@
 #define DRM_RADEON_SETPARAM               0x19
 #define DRM_RADEON_SURF_ALLOC             0x1a
 #define DRM_RADEON_SURF_FREE              0x1b
+#define DRM_RADEON_DMA_PREP               0x1c
+#define DRM_RADEON_DMA_KICK               0x1d
+#define DRM_RADEON_DMA_SYNC               0x1e
+#define DRM_RADEON_DMA_FREE               0x1f
+#define DRM_RADEON_DMA_INIT               0x20
 #define DRM_RADEON_MAX_DRM_COMMAND_INDEX  0x39
 
 
@@ -487,4 +492,54 @@
 	unsigned int address;
 } drmRadeonSurfaceFree;
 
+
+/* 1.20: DMA engine control
+ */
+
+typedef struct drm_radeon_dma_prepare {
+	uint64_t	mem_origin;
+	unsigned int	mem_pitch;
+	unsigned int	width;
+	unsigned int	height;
+	unsigned int	bpp;
+	unsigned int	swap;
+#define DRM_RADEON_DMA_SWAP_NONE        0
+#define DRM_RADEON_DMA_SWAP_16BITS      1
+#define DRM_RADEON_DMA_SWAP_32BITS      2
+#define DRM_RADEON_DMA_SWAP_HDW         3
+	unsigned int	direction;
+#define DRM_RADEON_DMA_TO_FB            0
+#define DRM_RADEON_DMA_FROM_FB          1
+#define DRM_RADEON_DMA_BOTH             2
+	/* result */
+	int		dma_id;
+} drmRadeonDmaPrepare;
+
+typedef struct drm_radeon_dma_kick {
+	unsigned int	dma_id;
+	unsigned int	direction;
+#define DRM_RADEON_DMA_TO_FB	0
+#define DRM_RADEON_DMA_FROM_FB	1
+	unsigned int	fb_origin;
+	unsigned int	fb_pitch;
+	unsigned int	pix_x,pix_y;
+	unsigned int	blit_w,blit_h;	
+	unsigned int	sync;
+	unsigned int	autofree;
+} drmRadeonDmaKick;
+
+typedef struct drm_radeon_dma_sync {
+	unsigned int	dma_id;
+} drmRadeonDmaSync;
+
+typedef struct drm_radeon_dma_free {
+	unsigned int	dma_id;
+} drmRadeonDmaFree;
+
+typedef struct drm_radeon_dma_init {
+	unsigned int	offset;
+	unsigned int	count;
+} drmRadeonDmaInit;
+
+
 #endif
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c	2005-09-12 06:58:53.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c	2005-10-10 15:13:39.000000000 +1000
@@ -116,6 +116,10 @@
        return;
 #endif
 
+#if RADEON_FB_PROTECT
+    pixels = (CARD32 *)(pointer)(info->RealFB + info->cursor_offset);
+#endif
+
     fg |= 0xff000000;
     bg |= 0xff000000;
 
@@ -211,6 +215,10 @@
 	OUTREG(RADEON_CRTC2_GEN_CNTL, save2 & (CARD32)~RADEON_CRTC2_CUR_EN);
     }
 
+#if RADEON_FB_PROTECT
+    d = (CARD32 *)(pointer)(info->RealFB + info->cursor_offset);
+#endif
+
 #ifdef ARGB_CURSOR
     info->cursor_argb = FALSE;
 #endif
@@ -320,6 +328,10 @@
 	OUTREG(RADEON_CRTC2_GEN_CNTL, save2 & (CARD32)~RADEON_CRTC2_CUR_EN);
     }
 
+#if RADEON_FB_PROTECT
+    d = (CARD32 *)(pointer)(info->RealFB + info->cursor_offset);
+#endif
+
 #ifdef ARGB_CURSOR
     info->cursor_argb = TRUE;
 #endif
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c	2005-10-10 15:42:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c	2005-10-09 12:34:28.000000000 +1000
@@ -1283,7 +1283,7 @@
     pDRIInfo->ddxDriverMinorVersion      = RADEON_VERSION_MINOR;
     pDRIInfo->ddxDriverPatchVersion      = RADEON_VERSION_PATCH;
     pDRIInfo->frameBufferPhysicalAddress = (void *)info->LinearAddr;
-    pDRIInfo->frameBufferSize            = info->FbMapSize - info->FbSecureSize;
+    pDRIInfo->frameBufferSize            = info->FbMapSize;
     pDRIInfo->frameBufferStride          = (pScrn->displayWidth *
 					    info->CurrentLayout.pixel_bytes);
     pDRIInfo->ddxDrawableTableEntry      = RADEON_MAX_DRAWABLES;
@@ -1605,6 +1605,22 @@
     pRADEONDRI->perctx_sarea_size = info->perctx_sarea_size;
 #endif
 
+#ifdef USE_EXA
+    if (info->useEXA && info->sgDMADescOffset) {
+	    drmRadeonDmaInit dmaInit;
+	    int rc;
+
+	    dmaInit.offset = info->sgDMADescOffset;
+	    dmaInit.count = 0x10000/16;
+	    rc = drmCommandWrite(info->drmFD, DRM_RADEON_DMA_INIT,
+				 &dmaInit, sizeof(drmRadeonDmaInit));
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "SG-DMA init %s\n", rc == 0 ?
+		       "successful" : "failed");
+	    info->sgDMA = (rc == 0);
+    }
+#endif
+
     /* Have shadowfb run only while there is 3d active. */
     if (!info->useEXA && info->allowPageFlip /* && info->drmMinor >= 3 */) {
 	ShadowFBInit( pScreen, RADEONDRIRefreshArea );
@@ -1987,13 +2003,28 @@
 {
     ScrnInfoPtr        pScrn   = xf86Screens[pScreen->myNum];
     RADEONInfoPtr      info    = RADEONPTR(pScrn);
+    FBAreaPtr fbarea;
+    int width;
+    int height;
+    int width_bytes;
+    int size_bytes;
 
     if (info->cardType!=CARD_PCIE || info->drmMinor<19)
       return;
 
-    if (info->FbSecureSize==0)
-      return;
+    size_bytes = RADEON_PCIGART_TABLE_SIZE;
+    width = pScrn->displayWidth;
+    width_bytes = width * (pScrn->bitsPerPixel / 8);
+    height = (size_bytes + width_bytes - 1)/width_bytes;
+    
+    fbarea = xf86AllocateOffscreenArea(pScreen, width, height, 256, NULL, NULL, NULL);
 
-    info->pciGartSize = RADEON_PCIGART_TABLE_SIZE;
-    info->pciGartOffset = (info->FbMapSize - info->FbSecureSize);
+    if (!fbarea) {
+      xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "PCI GART Table allocation failed due to stupid memory manager\n");
+    } else {
+      info->pciGartSize = size_bytes;
+      info->pciGartOffset = RADEON_ALIGN((fbarea->box.x1 + fbarea->box.y1 * width) *
+					 info->CurrentLayout.pixel_bytes, 256);
+      
+    }
 }
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c	2005-10-10 15:42:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c	2005-10-10 14:35:46.000000000 +1000
@@ -725,6 +725,26 @@
     }
 
     if (!info->FB) return FALSE;
+
+#if RADEON_FB_PROTECT
+    ErrorF("FbMapSize=%08lx\n", info->FbMapSize);
+    /* Linux HACK */
+# define MAP_ANONYMOUS 0x020 
+ {
+    int fd = open("/dev/zero", O_RDONLY);
+    info->RealFB = info->FB;
+    info->FB = mmap(0, info->FbMapSize, PROT_NONE,
+		    MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
+    if (info->FB == MAP_FAILED) {
+	    perror("Crap !");
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Failed to create protected"
+		       "fb mapping !\n");
+	    info->FB = info->RealFB;
+    } else
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Protected fb mapping fb=%p"
+		       " prot=%p\n", info->RealFB, info->FB);    
+ }
+#endif
     return TRUE;
 }
 
@@ -733,6 +753,11 @@
 {
     RADEONInfoPtr  info = RADEONPTR(pScrn);
 
+#if RADEON_FB_PROTECT
+    munmap(info->FB, info->FbMapSize);
+    info->FB = info->RealFB;
+#endif
+
     if (info->FBDev)
 	fbdevHWUnmapVidmem(pScrn);
     else
@@ -2761,8 +2786,6 @@
     pScrn->videoRam  &= ~1023;
     info->FbMapSize  = pScrn->videoRam * 1024;
 
-    info->FbSecureSize = 0;
-
 #ifdef XF86DRI
 				/* AGP/PCI */
     /* Proper autodetection of an AGP capable device requires examining
@@ -2846,10 +2869,6 @@
 		       "Invalid BusType option, using detected type\n");
 	}
     }
-
-    /* if the card is PCI Express reserve the last 32k for the gart table */
-    if (info->cardType == CARD_PCIE)
-        info->FbSecureSize = RADEON_PCIGART_TABLE_SIZE;
 #endif
     xf86GetOptValBool(info->Options, OPTION_SHOWCACHE, &info->showCache);
     if (info->showCache)
@@ -5164,7 +5183,7 @@
      * pixmap cache.  Should be enough for a fullscreen background
      * image plus some leftovers.
      */
-    info->textureSize = info->FbMapSize - info->FbSecureSize - 5 * bufferSize - depthSize;
+    info->textureSize = info->FbMapSize - 5 * bufferSize - depthSize;
 
     /* If that gives us less than half the available memory, let's
      * be greedy and grab some more.  Sorry, I care more about 3D
@@ -5184,7 +5203,7 @@
      */
     if (info->textureSize < 0) {
 	info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
- 	                    - 2 * width_bytes - 16384 - info->FbSecureSize;
+		- 2 * width_bytes - 16384 - RADEON_PCIGART_TABLE_SIZE;
     }
 
     /* Check to see if there is more room available after the 8192nd
@@ -5267,7 +5286,7 @@
     info->backY = info->backOffset / width_bytes;
     info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp;
 
-    scanlines = (info->FbMapSize-info->FbSecureSize) / width_bytes;
+    scanlines = info->FbMapSize / width_bytes;
     if (scanlines > 8191)
 	scanlines = 8191;
 
@@ -5352,7 +5371,6 @@
 
     info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) |
 			      ((info->depthOffset + info->fbLocation) >> 10));
-    return TRUE;
 }
 #endif /* XF86DRI */
 
@@ -5413,7 +5431,6 @@
 		       "Largest offscreen area available: %d x %d\n",
 		       width, height);
 	}
-	return TRUE;
     }    
 }
 #endif /* USE_XAA */
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa.c	2005-10-10 15:42:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa.c	2005-10-11 09:31:46.000000000 +1000
@@ -35,6 +35,7 @@
 
 #include "radeon.h"
 #include "radeon_reg.h"
+#include "r300_reg.h"
 #ifdef XF86DRI
 #include "radeon_dri.h"
 #endif
@@ -42,6 +43,7 @@
 #include "radeon_probe.h"
 #include "radeon_version.h"
 #ifdef XF86DRI
+#include "radeon_common.h"
 #include "radeon_sarea.h"
 #endif
 
@@ -52,7 +54,7 @@
 #define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn =  xf86Screens[pScr->myNum]; \
     RADEONInfoPtr info   = RADEONPTR(pScrn)
 
-#define RADEON_TRACE_FALL 0
+#define RADEON_TRACE_FALL 1
 #define RADEON_TRACE_DRAW 0
 
 #if RADEON_TRACE_FALL
@@ -200,8 +202,18 @@
     int bpp, rc, soff;
     CARD32 size, flags;
 
+#if RADEON_FB_PROTECT
+    if ((unsigned char *)pPix->devPrivate.ptr >= info->FB &&
+	(unsigned char *)pPix->devPrivate.ptr < (info->FB + info->FbMapSize)) {
+	offset = (unsigned char *)pPix->devPrivate.ptr - info->FB;
+	pPix->devPrivate.ptr = info->RealFB + offset;
+    } else
+	ErrorF("RADEONPrepareAccess: got pixmap @ %p !\n",
+	       pPix->devPrivate.ptr);
+#endif
+
     /* Front buffer is always set with proper swappers */
-    if (offset == 0)
+    if (offset == 0) 
         return TRUE;
 
     /* If same bpp as front buffer, just do nothing as the main
@@ -242,6 +254,9 @@
 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
 		       "drm: could not allocate surface for access"
 		       " swapper, err: %d!\n", rc);
+#if RADEON_FB_PROTECT
+	    pPix->devPrivate.ptr = info->FB + offset;
+#endif
 	    return FALSE;
 	}
 	swapper_surfaces[index] = offset;
@@ -264,12 +279,23 @@
     CARD32 offset = exaGetPixmapOffset(pPix);
     int bpp, soff;
 
+#if RADEON_FB_PROTECT
+    if ((unsigned char *)pPix->devPrivate.ptr >= info->RealFB &&
+	(unsigned char *)pPix->devPrivate.ptr < (info->RealFB+info->FbMapSize)) {
+	offset = (unsigned char *)pPix->devPrivate.ptr - info->RealFB;
+	pPix->devPrivate.ptr = info->FB + offset;
+    } else
+	ErrorF("RADEONFinishAccess: got pixmap @ %p !\n",
+	       pPix->devPrivate.ptr);
+#endif
+
     /* Front buffer is always set with proper swappers */
     if (offset == 0)
         return;
 
     if (swapper_surfaces[index] == 0)
         return;
+
 #if defined(XF86DRI)
     if (info->directRenderingEnabled && info->allowColorTiling) {
 	drmRadeonSurfaceFree drmsurffree;
@@ -380,7 +406,7 @@
 	screen_size = pScrn->virtualY * byteStride;
 
     info->exa.card.memoryBase = info->FB + pScrn->fbOffset;
-    info->exa.card.memorySize = info->FbMapSize - info->FbSecureSize;
+    info->exa.card.memorySize = info->FbMapSize;
     info->exa.card.offScreenBase = screen_size;
 
     xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating from a screen of %ld kb\n",
@@ -397,14 +423,17 @@
 	info->frontOffset = 0;
 	info->frontPitch = pScrn->displayWidth;
 
-	RADEONDRIAllocatePCIGARTTable(pScreen);
-	
-	if (info->cardType==CARD_PCIE)
-	  xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		     "Will use %d kb for PCI GART at offset 0x%08x\n",
-		     RADEON_PCIGART_TABLE_SIZE / 1024,
-		     (int)info->pciGartOffset);
-
+	if ((info->cardType==CARD_PCIE) && info->drmMinor >= 19) {
+	    info->pciGartSize = RADEON_PCIGART_TABLE_SIZE;
+	    info->pciGartOffset = RADEON_ALIGN(info->exa.card.offScreenBase,
+					       256);
+	    info->exa.card.offScreenBase = info->pciGartOffset +
+					   info->pciGartSize;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Will use %d kb for PCI GART at offset 0x%08x\n",
+		       RADEON_PCIGART_TABLE_SIZE / 1024,
+		       (int)info->pciGartOffset);
+	}
 	/* Reserve a static area for the back buffer the same size as the
 	 * visible screen.  XXX: This would be better initialized in ati_dri.c
 	 * when GLX is set up, but the offscreen memory manager's allocations
@@ -457,6 +486,13 @@
 	    /* Minimum texture size is for 2 256x256x32bpp textures */
 	    info->textureSize = 0;
 	}
+
+	/* Reserve some space for DMA descriptors */
+	if ((info->exa.card.memorySize - info->exa.card.offScreenBase) >
+	    0x10000) {
+		info->sgDMADescOffset = info->exa.card.offScreenBase,
+		info->exa.card.offScreenBase += 0x10000;
+	}
     }
 #endif /* XF86DRI */
 	
diff -urN xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_funcs.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_funcs.c
--- xc-COMMIT/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_funcs.c	2005-09-18 12:32:23.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_funcs.c	2005-10-15 17:22:12.000000000 +1000
@@ -56,6 +56,16 @@
 
 #include "fbdevhw.h"
 
+/*
+ * Ok, after measurements, it seems SG-DMA is not interesting
+ * for UTS (not _AT_ALL_ compared to HostDataBlit in fact). However,
+ * it makes a significant difference for DFS, especially on large images,
+ * I suspect that it's always good to do writes instead of read on any
+ * transfer direction...
+ */
+#undef UTS_USES_SG_DMA
+#define DFS_USES_SG_DMA
+
 static void
 FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
 {
@@ -209,9 +219,7 @@
 FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h,
 				char *src, int src_pitch)
 {
-#if X_BYTE_ORDER == X_BIG_ENDIAN || defined(ACCEL_CP)
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
-#endif
     CARD8	   *dst	     = pDst->devPrivate.ptr;
     unsigned int   dst_pitch = exaGetPixmapPitch(pDst);
     unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
@@ -225,13 +233,85 @@
 	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
 	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
 #endif
-
     TRACE;
 
-    if (bpp < 8)
+    if (bpp < 8) {
+	ErrorF("RADEON: UploadToScreen with %d bpp !\n", bpp);
 	return FALSE;
+    }
 
+#if !RADEON_FB_PROTECT
 #ifdef ACCEL_CP
+#ifdef UTS_USES_SG_DMA
+    /* Maybe threshold on pixmap size here */
+    if (info->sgDMA && info->directRenderingEnabled) {
+	drmRadeonDmaPrepare dmaPrep;
+	drmRadeonDmaKick dmaKick;
+	drmRadeonDmaFree dmaFree;
+	unsigned int fbdst = dst - info->FB + info->fbLocation;
+	int rc;
+
+	/* Prepare memory source */
+	dmaPrep.mem_origin = (uint64_t)src;
+	dmaPrep.mem_pitch = src_pitch;
+	dmaPrep.width = w;
+	dmaPrep.height = h;
+	dmaPrep.bpp = bpp;
+	dmaPrep.direction = DRM_RADEON_DMA_TO_FB;
+#if X_BYTE_ORDER == X_BIG_ENDIAN 
+	switch(bpp) {
+        case 32:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_32BITS;
+	    break;
+	case 16:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_16BITS;
+	    break;
+	default:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_NONE;
+	}
+#else
+	dmaPrep.swap = DRM_RADEON_DMA_SWAP_NONE;
+#endif
+	rc = drmCommandWriteRead(info->drmFD, DRM_RADEON_DMA_PREP,
+				 &dmaPrep, sizeof(drmRadeonDmaPrepare));
+	if (rc) {
+		ErrorF("UTS: DRM_RADEON_DMA_PREP failed, err: %d\n", rc);
+		goto fail;
+	}
+
+	/* Make sure we are idle */
+	exaWaitSync(pDst->drawable.pScreen);
+
+	/* Kick blit */
+	dmaKick.dma_id = dmaPrep.dma_id;
+	dmaKick.direction = DRM_RADEON_DMA_TO_FB;
+	dmaKick.fb_origin = fbdst + x * (bpp / 8) + y * dst_pitch;
+	dmaKick.fb_pitch = dst_pitch;
+	dmaKick.pix_x = 0;
+	dmaKick.pix_y = 0;
+	dmaKick.blit_w = w;
+	dmaKick.blit_h = h;
+	dmaKick.sync = 1;
+	dmaKick.autofree = 1;
+	rc = drmCommandWrite(info->drmFD, DRM_RADEON_DMA_KICK,
+			     &dmaKick, sizeof(drmRadeonDmaKick));
+
+	while (rc == -EINTR) {
+		drmRadeonDmaSync dmaSync;
+		dmaSync.dma_id = dmaPrep.dma_id;
+		rc = drmCommandWrite(info->drmFD, DRM_RADEON_DMA_SYNC,
+				     &dmaSync,
+				     sizeof(drmRadeonDmaSync));
+	}
+	if (rc == 0)
+		return TRUE;
+	ErrorF("UTS: DRM_RADEON_DMA_KICK failed, err: %d\n", rc);
+	dmaFree.dma_id = dmaPrep.dma_id;
+	drmCommandWrite(info->drmFD, DRM_RADEON_DMA_FREE,
+			&dmaFree, sizeof(drmRadeonDmaFree));	
+    }
+ fail:
+#endif
     if (info->directRenderingEnabled) {
 	CARD8 *buf;
 	int cpp = bpp / 8;
@@ -251,7 +331,7 @@
 	return TRUE;
   }
 #endif
-
+#endif
     /* Do we need that sync here ? probably not .... */
     exaWaitSync(pDst->drawable.pScreen);
 
@@ -270,9 +350,28 @@
     }
     OUTREG(RADEON_SURFACE_CNTL, swapper);
 #endif
+    
+#if RADEON_FB_PROTECT
+    if (dst >= info->FB && dst < (info->FB + info->FbMapSize)) {
+	unsigned long offset;
+	offset = dst - info->FB;
+	dst = info->RealFB + offset;
+	if ((offset + ((y + h) * dst_pitch)) >
+	    info->FbMapSize) {
+		ErrorF("RADEONUploadToScreen() out of screen boundaries !!!\n");
+		ErrorF("offset: %08x, x=%d, y=%d, w=%d, h=%d, pitch=%d, bpp=%d\n",
+		       offset, x, y, w, h, dst_pitch, bpp);
+	}
+    } else
+	ErrorF("RADEONUploadToScreen: got pixmap @ %p !\n", dst);
+#endif
+
     w *= bpp / 8;
     dst += (x * bpp / 8) + (y * dst_pitch);
 
+    if ((x + w) > dst_pitch)
+	    ErrorF("x + w > pitch ! x=%d, w=%d, dst_pitch=%d\n", x, w, dst_pitch);
+
     while (h--) {
 	memcpy(dst, src, w);
 	src += src_pitch;
@@ -291,23 +390,99 @@
 FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h,
 				    char *dst, int dst_pitch)
 {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
+    unsigned char *src	     = pSrc->devPrivate.ptr;
+    int		   src_pitch = exaGetPixmapPitch(pSrc);
+    int		   bpp	     = pSrc->drawable.bitsPerPixel;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
     unsigned char *RADEONMMIO = info->MMIO;
     unsigned int swapper = info->ModeReg.surface_cntl &
 	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
 	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
 #endif
-    unsigned char *src	     = pSrc->devPrivate.ptr;
-    int		   src_pitch = exaGetPixmapPitch(pSrc);
-    int		   bpp	     = pSrc->drawable.bitsPerPixel;
 
     TRACE;
 
-    /*
-     * This is currently done without DMA until I have ironed out the
-     * various endian issues with R300 among others
-     */
+    if (bpp < 8) {
+	ErrorF("RADEON: DownloadFromScreen with %d bpp !\n", bpp);
+	return FALSE;
+    }
+
+#if !RADEON_FB_PROTECT
+#ifdef ACCEL_CP
+#ifdef DFS_USES_SG_DMA
+    /* Maybe threshold on pixmap size here */
+    if (info->sgDMA && info->directRenderingEnabled) {
+	drmRadeonDmaPrepare dmaPrep;
+	drmRadeonDmaKick dmaKick;
+	drmRadeonDmaFree dmaFree;
+	unsigned int fbsrc = src - info->FB + info->fbLocation;
+	int rc;
+
+	/* Prepare memory source */
+	dmaPrep.mem_origin = (uint64_t)dst;
+	dmaPrep.mem_pitch = dst_pitch;
+	dmaPrep.width = w;
+	dmaPrep.height = h;
+	dmaPrep.bpp = bpp;
+	dmaPrep.direction = DRM_RADEON_DMA_FROM_FB;
+#if X_BYTE_ORDER == X_BIG_ENDIAN 
+	switch(bpp) {
+        case 32:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_32BITS;
+	    break;
+	case 16:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_16BITS;
+	    break;
+	default:
+	    dmaPrep.swap = DRM_RADEON_DMA_SWAP_NONE;
+	}
+#else
+	dmaPrep.swap = DRM_RADEON_DMA_SWAP_NONE;
+#endif
+	rc = drmCommandWriteRead(info->drmFD, DRM_RADEON_DMA_PREP,
+				 &dmaPrep, sizeof(drmRadeonDmaPrepare));
+	if (rc) {
+		ErrorF("DFS: DRM_RADEON_DMA_PREP failed, err: %d\n", rc);
+		goto fail;
+	}
+
+	/* Make sure we are idle */
+	exaWaitSync(pSrc->drawable.pScreen);
+
+	/* Kick blit */
+	dmaKick.dma_id = dmaPrep.dma_id;
+	dmaKick.direction = DRM_RADEON_DMA_FROM_FB;
+	dmaKick.fb_origin = fbsrc + x * (bpp / 8) + y * src_pitch;
+	dmaKick.fb_pitch = src_pitch;
+	dmaKick.pix_x = 0;
+	dmaKick.pix_y = 0;
+	dmaKick.blit_w = w;
+	dmaKick.blit_h = h;
+	dmaKick.sync = 1;
+	dmaKick.autofree = 1;
+	rc = drmCommandWrite(info->drmFD, DRM_RADEON_DMA_KICK,
+			     &dmaKick, sizeof(drmRadeonDmaKick));
+
+	while (rc == -EINTR) {
+		drmRadeonDmaSync dmaSync;
+		dmaSync.dma_id = dmaPrep.dma_id;
+		rc = drmCommandWrite(info->drmFD, DRM_RADEON_DMA_SYNC,
+				     &dmaSync,
+				     sizeof(drmRadeonDmaSync));
+	}
+	if (rc == 0)
+		return TRUE;
+	ErrorF("DFS: DRM_RADEON_DMA_KICK failed, err: %d\n", rc);
+	dmaFree.dma_id = dmaPrep.dma_id;
+	drmCommandWrite(info->drmFD, DRM_RADEON_DMA_FREE,
+			&dmaFree, sizeof(drmRadeonDmaFree));	
+    }
+ fail:
+#endif
+#endif
+#endif
+
     exaWaitSync(pSrc->drawable.pScreen);
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
@@ -326,6 +501,17 @@
     OUTREG(RADEON_SURFACE_CNTL, swapper);
 #endif
 
+#if RADEON_FB_PROTECT
+    if (src >= info->FB && src < (info->FB + info->FbMapSize)) {
+	unsigned long offset;
+	offset = src - info->FB;
+	src = info->RealFB + offset;
+    } else {
+	ErrorF("RADEONDownloadFromScreen: got pixmap @ %p !\n", src);
+	xf86SigHandler(11);
+    }
+#endif
+
     src += (x * bpp / 8) + (y * src_pitch);
     w *= bpp / 8;
 
