diff -urN drm-HEAD.orig/linux-core/Makefile drm-HEAD/linux-core/Makefile --- drm-HEAD.orig/linux-core/Makefile 2005-10-07 14:01:27.000000000 +1000 +++ drm-HEAD/linux-core/Makefile 2005-10-09 10:08:07.000000000 +1000 @@ -78,7 +78,8 @@ R128SHARED = r128_drv.h r128_drm.h r128_cce.c r128_state.c r128_irq.c RADEONHEADERS = radeon_drv.h radeon_drm.h r300_reg.h $(DRMHEADERS) RADEONSHARED = radeon_drv.h radeon_drm.h radeon_cp.c radeon_irq.c \ - radeon_mem.c radeon_state.c r300_cmdbuf.c r300_reg.h + radeon_mem.c radeon_state.c radeon_dma.c \ + r300_cmdbuf.c r300_reg.h MGAHEADERS = mga_drv.h mga_drm.h mga_ucode.h $(DRMHEADERS) MGASHARED = mga_dma.c mga_drm.h mga_drv.h mga_irq.c mga_state.c \ mga_ucode.h mga_warp.c diff -urN drm-HEAD.orig/linux-core/Makefile.kernel drm-HEAD/linux-core/Makefile.kernel --- drm-HEAD.orig/linux-core/Makefile.kernel 2005-10-07 14:01:27.000000000 +1000 +++ drm-HEAD/linux-core/Makefile.kernel 2005-10-09 10:09:00.000000000 +1000 @@ -18,7 +18,8 @@ i810-objs := i810_drv.o i810_dma.o i830-objs := i830_drv.o i830_dma.o i830_irq.o i915-objs := i915_drv.o i915_dma.o i915_irq.o i915_mem.o -radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o r300_cmdbuf.o +radeon-objs := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o radeon_irq.o \ + radeon_dma.o r300_cmdbuf.o sis-objs := sis_drv.o sis_ds.o sis_mm.o ffb-objs := ffb_drv.o ffb_context.o savage-objs := savage_drv.o savage_bci.o savage_state.o diff -urN drm-HEAD.orig/shared-core/radeon_cp.c drm-HEAD/shared-core/radeon_cp.c --- drm-HEAD.orig/shared-core/radeon_cp.c 2005-10-07 14:01:27.000000000 +1000 +++ drm-HEAD/shared-core/radeon_cp.c 2005-10-15 16:41:39.000000000 +1000 @@ -1806,6 +1806,11 @@ /* deallocate kernel resources */ radeon_do_cleanup_cp(dev); + + /* unmap DMA */ + if (dev_priv->dma_descs_cpu) + iounmap(dev_priv->dma_descs_cpu); + dev_priv->dma_descs_cpu = NULL; } } diff -urN drm-HEAD.orig/shared-core/radeon_dma.c drm-HEAD/shared-core/radeon_dma.c --- /dev/null 2005-10-15 09:32:54.000000000 +1000 +++ drm-HEAD/shared-core/radeon_dma.c 2005-10-15 16:38:51.000000000 +1000 @@ -0,0 +1,547 @@ +/* + * TODO: + * + * - Use a separate map for the DMA descriptors to prevent random users + * from tapping them + * - Add queue of prepared DMAs so client can "cache" prepared pixmaps + * - Add max amount of wired mem both per client and global (maybe auto + * unprepare pixmaps, LRU ?) + * - Fix all XXX :) + * - Add sanity checking of arguments all over the place + * - Become king of the world + * - Balance having the DMA mapping of the sglist be done at prepare time + * or at kick time... The later would allow to have exact transfer direction + * but would add some overhead per transfer. It may be a good idea for + * machines + * who have a small iommu area + */ + +#undef DEBUG + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_drv.h" + +#ifdef DEBUG +#define DBG(fmt...) do { printk(fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + +static struct radeon_dma_info *radeon_dma_find(drm_device_t * dev, + DRMFILE filp, unsigned int id) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct radeon_dma_info *dma; + + /* Only one DMA context pending for now */ + if (id != 1) + return NULL; + dma = &dev_priv->pending_dma; + if (dma->filp != filp) + return NULL; + return dma; +} + +static int radeon_dma_build_sgtable(drm_device_t * dev, + struct radeon_dma_info *dma) +{ + int i, result = 0; + int count = dma->page_count; + + /* Allocate pages array */ + dma->user_pages = kzalloc(sizeof(struct page *) * count, GFP_KERNEL); + if (dma->user_pages == NULL) + return -ENOMEM; + + /* Get user pages */ + down_read(¤t->mm->mmap_sem); + count = get_user_pages(current, current->mm, + dma->param.mem_origin & PAGE_MASK, count, + dma->param.direction != DRM_RADEON_DMA_TO_FB, + 0, dma->user_pages, NULL); + up_read(¤t->mm->mmap_sem); + if (count < 0) { + result = count; + goto bail_free; + } + dma->page_count = count; + + /* Allocate sg table */ + dma->sg = kmalloc(sizeof(struct scatterlist) * count, GFP_KERNEL); + if (dma->sg == NULL) { + result = -ENOMEM; + goto bail_put; + } + + /* Fill sg table */ + for (i = 0; i < count; i++) { + dma->sg[i].page = dma->user_pages[i]; + dma->sg[i].offset = 0; + dma->sg[i].length = PAGE_SIZE; + } + + /* Map for DMA. We don't know yet which direction transfers will take + * so we map it bidir for now. + */ + count = dma_map_sg(&dev->pdev->dev, dma->sg, count, + DMA_BIDIRECTIONAL); + if (count <= 0) { + result = -EIO; + goto bail_freesg; + } + dma->dma_entries = count; + + return result; + + bail_freesg: + kfree(dma->sg); + dma->sg = NULL; + bail_put: + for (i = 0; i < dma->page_count; i++) + page_cache_release(dma->user_pages[i]); + bail_free: + kfree(dma->user_pages); + dma->user_pages = NULL; + + return result; +} + +static void radeon_dma_free_sgtable(drm_device_t *drm_device, + struct radeon_dma_info *dma) +{ + int i; + + /* Unmap DMA */ + if (dma->dma_entries) { + /* NOTE: The "nent" passed to dma_unmap_sg is _not_ the + * one that was returned by dma_map_sg, but rather the one + * we passed in the first place + */ + dma_unmap_sg(&dev->pdev.dev, dma->sg, dma->page_count, + (dma->param.dir == DRM_RADEON_DMA_FROM_FB) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + dma->dma_entries = 0; + } + + /* Free scatterlist */ + kfree(dma->sg); + dma->sg = NULL; + + /* Free user pages */ + for (i = 0; i < dma->page_count; i++) { + if (dma->user_pages[i] == NULL) + continue; + if (dma->dirty && !PageReserved(dma->user_pages[i])) + set_page_dirty_lock(dma->user_pages[i]); + page_cache_release(dma->user_pages[i]); + } + kfree(dma->user_pages); + dma->user_pages = NULL; +} + + +static void radeon_do_dma_prepare(drm_device_t * dev, + drm_radeon_dma_prepare_t *pb) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + struct radeon_dma_info *dma; + unsigned long ms, me; + int count, rc, result; + + /* only one pending for now, id always 1 */ + dma = &dev_priv->pending_dma; + if (dma->filp != 0) { + result = -EBUSY; + goto bail; + } + + /* Only one DMA context for now */ + result = 1; + + memset(dma, 0, sizeof(struct radeon_dma_info)); + dma->param = *pb; + dma->filp = dev->lock.filp; + + /* Ok, we need to map user pages in. We could be ultra-smart and + * get exactly the pages that are needed, but it's complicated. I'll + * assume instead that memory based pixmaps rarely have a pitch much + * larger than the width and thus treat the whole pixmap as a single + * contiguous chunk + */ + ms = pb->mem_origin & PAGE_MASK; + me = PAGE_ALIGN(pb->mem_origin + pb->height * pb->mem_pitch); + dma->page_count = count = (me - ms) / PAGE_SIZE; + + DBG("%d pages to be prepared\n", dma->page_count); + + /* Build SG table & prepare memory for DMA */ + rc = radeon_dma_build_sgtable(dev, dma); + if (rc < 0) { + result = rc; + goto bail_clear; + } + + /* Ok, preparation is complete, we just return the ID to the caller */ + + bail_clear: + if (result < 0) { + dma->filp = NULL; + DBG("Prepare failed, err %d\n", result); + } + bail: + pb->dma_id = result; +} + +int radeon_dma_prepare(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_dma_prepare_t pb; + + DBG(" -> radeon_dma_prepare()\n"); + + /* XX lock only necessary for queuing blit ??? */ + LOCK_TEST_WITH_RETURN(dev, filp); + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(pb, (drm_radeon_dma_prepare_t __user *) data, + sizeof(pb)); + + DBG(" params: \n"); + DBG(" mem_origin : 0x%08lx\n", (unsigned long)pb.mem_origin); + DBG(" mem_pitch : 0x%08lx\n", (unsigned long)pb.mem_pitch); + DBG(" width : %d\n", pb.width); + DBG(" height : %d\n", pb.height); + DBG(" bpp : %d\n", pb.bpp); + DBG(" swap : %d\n", pb.swap); + + radeon_do_dma_prepare(dev, &pb); + + DBG(" result: %d\n", pb.dma_id); + + if (DRM_COPY_TO_USER((drm_radeon_dma_prepare_t __user *) data, &pb, + sizeof(pb))) { + DRM_ERROR("copy_to_user\n"); + return DRM_ERR(EFAULT); + } + return 0; +} + +int radeon_dma_init(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_dma_init_t pb; + unsigned long addr, size; + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(pb, (drm_radeon_dma_init_t __user *) data, + sizeof(pb)); + + if (dev_priv->dma_descs_cpu) + iounmap(dev_priv->dma_descs_cpu); + addr = drm_get_resource_start(dev, 0) + pb.offset; + size = pb.count * 16; + dev_priv->dma_descs_cpu = ioremap(addr, size); + if (dev_priv->dma_descs_cpu == NULL) + return -ENOMEM; + dev_priv->dma_desc_count = pb.count; + dev_priv->dma_descs_card = dev_priv->fb_location + pb.offset; + + DBG("DMA offset: %08x (%08x in card space, %08lx in CPU space, " + "%p virtual)\n", + pb.offset, dev_priv->dma_descs_card, addr, + dev_priv->dma_descs_cpu); + + return 0; +} + +static void radeon_do_dma_free(drm_device_t * dev, struct radeon_dma_info *dma) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + /* XXX Cancel pending DMA */ + if (dma->pending) { + while (RADEON_READ(RADEON_DMA_GUI_STATUS) + & RADEON_DMA_GUI_STAT_ACTIVE) { + msleep(1); + } + } + + radeon_dma_free_sgtable(dev, dma); + dma->filp = 0; +} + +static int radeon_do_dma_sync(drm_device_t * dev, + struct radeon_dma_info *dma) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int rc = 0; + + DRM_WAIT_ON(rc, dev_priv->dma_queue, 2 * DRM_HZ, + !(RADEON_READ(RADEON_DMA_GUI_STATUS) & + RADEON_DMA_GUI_STAT_ACTIVE)); +#if 0 + udelay(10); + while (RADEON_READ(RADEON_DMA_GUI_STATUS) + & RADEON_DMA_GUI_STAT_ACTIVE) { + msleep(1); + } +#endif + DBG(" dma_sync result: %d\n", rc); + if (rc == 0) + dma->pending = 0; + if (dma->pending == 0 && dma->autofree) { + DBG(" autofreeing...\n"); + radeon_do_dma_free(dev, dma); + DBG(" done\n"); + } + + return rc; +} + +static int radeon_do_dma_kick(drm_device_t * dev, + struct radeon_dma_info *dma, + drm_radeon_dma_kick_t *pb) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + unsigned int x, y, cpp, len, dcount, sg_off, swap_bit; + unsigned long smem, mem, sg_mem; + struct scatterlist *sg; + int rc = 0; + u32 *desc; + u32 swapper; + + if (dma->pending) + return -EBUSY; + if ((dma->param.direction != DRM_RADEON_DMA_BOTH && + pb->direction != dma->param.direction) || + pb->direction == DRM_RADEON_DMA_BOTH) { + printk("invalid direction\n"); + return -EINVAL; + } + if (dev_priv->dma_descs_cpu == NULL) + return -ENODEV; + + /* + * Build descriptors + */ + + cpp = dma->param.bpp >> 3; + + /* It looks we need to use a different swapper based on the + * direction of the transfer + */ + swap_bit = (pb->direction == DRM_RADEON_DMA_FROM_FB) ? 24 : 22; + + /* Adjust starting address */ + smem = dma->param.mem_origin + pb->pix_y * dma->param.mem_pitch + + pb->pix_x * cpp; + + /* Current pixel position */ + x = y = 0; + + /* Scatterlist cursor */ + sg = dma->sg; + sg_mem = dma->param.mem_origin & PAGE_MASK; + + /* Current DMA descriptor */ + desc = dev_priv->dma_descs_cpu; + dcount = 1; + + DBG(" descriptors:\n"); + + /* Endian swappers should be disabled while writing to the + * descriptors in video memory ! Either that, or we should + * "know" the current bpp depth and adjust the descriptor + * swapping (as tweaking SURFACE_CNTL may be slow) + */ + swapper = RADEON_READ(RADEON_SURFACE_CNTL); + RADEON_WRITE(RADEON_SURFACE_CNTL, swapper & + ~(RADEON_NONSURF_AP0_SWP_MASK | + RADEON_NONSURF_AP1_SWP_MASK)); + (void)RADEON_READ(RADEON_SURFACE_CNTL); + + while (y < pb->blit_h) { + if (dcount > dev_priv->dma_desc_count) { + printk("too many descs ! (%d vs. %d)\n", dcount, + dev_priv->dma_desc_count); + RADEON_WRITE(RADEON_SURFACE_CNTL, swapper); + return -ENXIO; + } + + /* Current user memory position */ + mem = smem + y * dma->param.mem_pitch + x * cpp; + + /* Find in scatterlist */ + WARN_ON(mem < sg_mem); + while (mem > (sg_mem + sg_dma_len(sg) - 1)) { + sg_mem += sg_dma_len(sg); + sg++; + } + sg_off = mem - sg_mem; + + /* remaining len. Note: we assume that we are always dealing + * with multiples of cpp, which should hopefully be fine + */ + len = min(sg_dma_len(sg) - sg_off, (pb->blit_w - x) * cpp); + if (len % cpp) + printk(KERN_WARNING "radeon: DMA len unaligned " + "(%d)\n", len); + + if (pb->direction == DRM_RADEON_DMA_FROM_FB) { + desc[1] = cpu_to_le32(sg_dma_address(sg) + sg_off); + desc[0] = cpu_to_le32(pb->fb_origin + + (y * pb->fb_pitch) + x * cpp); + } else { + desc[0] = cpu_to_le32(sg_dma_address(sg) + sg_off); + desc[1] = cpu_to_le32(pb->fb_origin + + (y * pb->fb_pitch) + x * cpp); + } + desc[2] = cpu_to_le32(len | (dma->param.swap << swap_bit)); + desc[3] = 0; + DBG(" %08x %08x %08x\n", le32_to_cpu(desc[0]), + le32_to_cpu(desc[1]), le32_to_cpu(desc[2])); + desc += 4; + + + /* Next chunk */ + x += len / cpp; + if (x >= pb->blit_w) { + x = pb->pix_x; + y++; + } + } + + /* Mark last descriptor */ + desc -= 4; + desc[2] |= cpu_to_le32(RADEON_DMA_GUI_CMD_EOL); + + DBG(" kicking...\n"); + + /* Feed DMA. We could use the ring for that but for now we just don't + * bother + */ + dma->pending = 1; + dma->autofree = pb->autofree; + mb(); + RADEON_WRITE(RADEON_DMA_GUI_STATUS, 0); + (void)RADEON_READ(RADEON_DMA_GUI_STATUS); + RADEON_WRITE(RADEON_DMA_GUI_TABLE_ADDR, dev_priv->dma_descs_card); + (void)RADEON_READ(RADEON_DMA_GUI_STATUS); + + /* Restore swapper control */ + RADEON_WRITE(RADEON_SURFACE_CNTL, swapper); + + if (pb->sync) + rc = radeon_do_dma_sync(dev, dma); + return rc; +} + +int radeon_dma_kick(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_dma_kick_t pb; + struct radeon_dma_info *dma; + + DBG(" -> radeon_dma_kick()\n"); + + LOCK_TEST_WITH_RETURN(dev, filp); + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(pb, + (struct drm_radeon_dma_kick_t __user *) data, + sizeof(pb)); + DBG(" params: \n"); + DBG(" dma_id : %d\n", pb.dma_id); + DBG(" direction : %d\n", pb.direction); + DBG(" fb_origin : 0x%08x\n", pb.fb_origin); + DBG(" fb_pitch : 0x%08x\n", pb.fb_pitch); + DBG(" pix_x : %d\n", pb.pix_x); + DBG(" pix_y : %d\n", pb.pix_y); + DBG(" blit_w : %d\n", pb.blit_w); + DBG(" blit_h : %d\n", pb.blit_h); + DBG(" sync : %d\n", pb.sync); + DBG(" autofree : %d\n", pb.autofree); + + dma = radeon_dma_find(dev, filp, pb.dma_id); + if (dma == NULL) + return -EINVAL; + return radeon_do_dma_kick(dev, dma, &pb); +} + +int radeon_dma_sync(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_dma_sync_t pb; + struct radeon_dma_info *dma; + + DBG(" -> radeon_dma_sync()\n"); + + LOCK_TEST_WITH_RETURN(dev, filp); + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(pb, + (struct drm_radeon_dma_sync_t __user *) data, + sizeof(pb)); + dma = radeon_dma_find(dev, filp, pb.dma_id); + if (dma == NULL) + return -EINVAL; + return radeon_do_dma_sync(dev, dma); +} + +int radeon_dma_free(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_dma_free_t pb; + struct radeon_dma_info *dma; + + DBG(" -> radeon_dma_free()\n"); + + if (!dev_priv) { + DRM_ERROR("%s called with no initialization\n", __FUNCTION__); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(pb, (drm_radeon_dma_free_t __user *) data, + sizeof(pb)); + + dma = radeon_dma_find(dev, filp, pb.dma_id); + if (dma == NULL) + return -EINVAL; + radeon_do_dma_free(dev, dma); + + return 0; +} + + +void radeon_dma_release(DRMFILE filp, drm_device_t * dev) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + + /* only one pending for now, id is always 1 */ + if (dev_priv->pending_dma.filp == filp) + radeon_do_dma_free(dev, &dev_priv->pending_dma); +} + diff -urN drm-HEAD.orig/shared-core/radeon_drm.h drm-HEAD/shared-core/radeon_drm.h --- drm-HEAD.orig/shared-core/radeon_drm.h 2005-10-07 14:01:28.000000000 +1000 +++ drm-HEAD/shared-core/radeon_drm.h 2005-10-09 10:04:10.000000000 +1000 @@ -464,6 +464,11 @@ #define DRM_RADEON_SETPARAM 0x19 #define DRM_RADEON_SURF_ALLOC 0x1a #define DRM_RADEON_SURF_FREE 0x1b +#define DRM_RADEON_DMA_PREP 0x1c +#define DRM_RADEON_DMA_KICK 0x1d +#define DRM_RADEON_DMA_SYNC 0x1e +#define DRM_RADEON_DMA_FREE 0x1f +#define DRM_RADEON_DMA_INIT 0x20 #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -492,6 +497,11 @@ #define DRM_IOCTL_RADEON_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t) #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t) #define DRM_IOCTL_RADEON_SURF_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t) +#define DRM_IOCTL_RADEON_DMA_PREP DRM_IORW(DRM_COMMAND_BASE + DRM_RADEON_DMA_PREP, drm_radeon_dma_prepare_t) +#define DRM_IOCTL_RADEON_DMA_KICK DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_DMA_KICK, drm_radeon_dma_kick_t) +#define DRM_IOCTL_RADEON_DMA_SYNC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_DMA_SYNC, drm_radeon_dma_sync_t) +#define DRM_IOCTL_RADEON_DMA_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_DMA_FREE, drm_radeon_dma_free_t) +#define DRM_IOCTL_RADEON_DMA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_DMA_INIT, drm_radeon_dma_init_t) typedef struct drm_radeon_init { enum { @@ -691,6 +701,7 @@ #define RADEON_SETPARAM_FB_LOCATION 1 /* determined framebuffer location */ #define RADEON_SETPARAM_SWITCH_TILING 2 /* enable/disable color tiling */ #define RADEON_SETPARAM_PCIGART_LOCATION 3 /* PCI Gart Location */ +#define RADEON_SETPARAM_MAX_SURFACE 4 /* Set max surface register */ /* 1.14: Clients can allocate/free a surface */ @@ -704,5 +715,50 @@ unsigned int address; } drm_radeon_surface_free_t; +/* 1.20: DMA engine control + */ + +typedef struct drm_radeon_dma_prepare { + uint64_t mem_origin; + unsigned int mem_pitch; + unsigned int width; + unsigned int height; + unsigned int bpp; + unsigned int swap; +#define DRM_RADEON_DMA_SWAP_NONE 0 +#define DRM_RADEON_DMA_SWAP_16BITS 1 +#define DRM_RADEON_DMA_SWAP_32BITS 2 +#define DRM_RADEON_DMA_SWAP_HDW 3 + unsigned int direction; +#define DRM_RADEON_DMA_TO_FB 0 +#define DRM_RADEON_DMA_FROM_FB 1 +#define DRM_RADEON_DMA_BOTH 2 + /* result */ + int dma_id; +} drm_radeon_dma_prepare_t; + +typedef struct drm_radeon_dma_kick { + unsigned int dma_id; + unsigned int direction; + unsigned int fb_origin; + unsigned int fb_pitch; + unsigned int pix_x,pix_y; + unsigned int blit_w,blit_h; + unsigned int sync; + unsigned int autofree; +} drm_radeon_dma_kick_t; + +typedef struct drm_radeon_dma_sync { + unsigned int dma_id; +} drm_radeon_dma_sync_t; + +typedef struct drm_radeon_dma_free { + unsigned int dma_id; +} drm_radeon_dma_free_t; + +typedef struct drm_radeon_dma_init { + unsigned int offset; + unsigned int count; +} drm_radeon_dma_init_t; #endif diff -urN drm-HEAD.orig/shared-core/radeon_drv.h drm-HEAD/shared-core/radeon_drv.h --- drm-HEAD.orig/shared-core/radeon_drv.h 2005-10-07 14:01:28.000000000 +1000 +++ drm-HEAD/shared-core/radeon_drv.h 2005-10-09 12:25:57.000000000 +1000 @@ -188,6 +188,18 @@ DRMFILE filp; }; +#define RADEON_MAX_DMA_PAGES 256 + +struct radeon_dma_info { + drm_radeon_dma_prepare_t param; + struct page **user_pages; + unsigned int page_count;; + struct scatterlist *sg; + unsigned int dma_entries; + int dirty, pending, autofree; + DRMFILE filp; +}; + typedef struct drm_radeon_private { drm_radeon_ring_buffer_t ring; @@ -266,9 +278,17 @@ unsigned long pcigart_offset; drm_ati_pcigart_info gart_info; + + /* only one for now */ + struct radeon_dma_info pending_dma; + u32 *dma_descs_cpu; + u32 dma_descs_card; + unsigned int dma_desc_count; + wait_queue_head_t dma_queue; + /* starting from here on, data is preserved accross an open */ uint32_t flags; /* see radeon_chip_flags */ - + } drm_radeon_private_t; typedef struct drm_radeon_buf_priv { @@ -322,6 +342,14 @@ extern long radeon_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +extern int radeon_dma_prepare(DRM_IOCTL_ARGS); +extern int radeon_dma_kick(DRM_IOCTL_ARGS); +extern int radeon_dma_sync(DRM_IOCTL_ARGS); +extern int radeon_dma_free(DRM_IOCTL_ARGS); +extern int radeon_dma_init(DRM_IOCTL_ARGS); +extern void radeon_dma_release(DRMFILE filp, drm_device_t * dev); + + /* r300_cmdbuf.c */ extern void r300_init_reg_flags(void); @@ -367,6 +395,23 @@ #define RADEON_CRTC2_OFFSET 0x0324 #define RADEON_CRTC2_OFFSET_CNTL 0x0328 +#define RADEON_DMA_GUI_TABLE_ADDR 0x780 +#define RADEON_DMA_GUI_COMMAND 0x78c +# define RADEON_DMA_GUI_CMD_COUNT_MASK 0x001fffffu +# define RADEON_DMA_GUI_CMD_SRC_SWAP_MASK 0x00c00000u +# define RADEON_DMA_GUI_CMD_SRC_SWAP_SHIFT 22 +# define RADEON_DMA_GUI_CMD_DST_SWAP_MASK 0x03000000u +# define RADEON_DMA_GUI_CMD_DST_SWAP_SHIFT 24 +# define RADEON_DMA_GUI_CMD_SAS 0x04000000u +# define RADEON_DMA_GUI_CMD_DAS 0x08000000u +# define RADEON_DMA_GUI_CMD_SAIC 0x10000000u +# define RADEON_DMA_GUI_CMD_DAIC 0x20000000u +# define RADEON_DMA_GUI_CMD_INTDIS 0x40000000u +# define RADEON_DMA_GUI_CMD_EOL 0x80000000u +#define RADEON_DMA_GUI_STATUS 0x790 +# define RADEON_DMA_GUI_STAT_ABORT_EN (1 << 20) +# define RADEON_DMA_GUI_STAT_ACTIVE (1 << 21) +# define RADEON_DMA_GUI_STAT_SWAP_MASK (3 << 22) #define RADEON_PCIE_INDEX 0x0030 #define RADEON_PCIE_DATA 0x0034 #define RADEON_PCIE_TX_GART_CNTL 0x10 @@ -445,6 +490,8 @@ # define RADEON_SW_INT_TEST (1 << 25) # define RADEON_SW_INT_TEST_ACK (1 << 25) # define RADEON_SW_INT_FIRE (1 << 26) +# define RADEON_GUI_DMA_INT (1 << 30) +# define RADEON_GUI_DMA_INT_ACK (1 << 30) #define RADEON_HOST_PATH_CNTL 0x0130 # define RADEON_HDP_SOFT_RESET (1 << 26) diff -urN drm-HEAD.orig/shared-core/radeon_irq.c drm-HEAD/shared-core/radeon_irq.c --- drm-HEAD.orig/shared-core/radeon_irq.c 2005-06-06 21:35:43.000000000 +1000 +++ drm-HEAD/shared-core/radeon_irq.c 2005-10-15 16:23:21.000000000 +1000 @@ -71,8 +71,9 @@ /* Only consider the bits we're interested in - others could be used * outside the DRM */ - stat = radeon_acknowledge_irqs(dev_priv, (RADEON_SW_INT_TEST_ACK | - RADEON_CRTC_VBLANK_STAT)); + stat = radeon_acknowledge_irqs(dev_priv, RADEON_SW_INT_TEST_ACK | + RADEON_CRTC_VBLANK_STAT_ACK | + RADEON_GUI_DMA_INT_ACK); if (!stat) return IRQ_NONE; @@ -88,6 +89,13 @@ drm_vbl_send_signals(dev); } + /* DMA interrupt */ + if (stat & RADEON_GUI_DMA_INT) { + if (!dev_priv->pending_dma.pending) + printk(KERN_WARNING "radeon: stale DMA irq !\n"); + DRM_WAKEUP(&dev_priv->dma_queue); + } + return IRQ_HANDLED; } @@ -214,21 +222,30 @@ RADEON_WRITE(RADEON_GEN_INT_CNTL, 0); /* Clear bits if they're already high */ - radeon_acknowledge_irqs(dev_priv, (RADEON_SW_INT_TEST_ACK | - RADEON_CRTC_VBLANK_STAT)); + radeon_acknowledge_irqs(dev_priv, RADEON_SW_INT_TEST_ACK | + RADEON_CRTC_VBLANK_STAT_ACK | + RADEON_GUI_DMA_INT_ACK); } void radeon_driver_irq_postinstall(drm_device_t * dev) { drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private; - + atomic_set(&dev_priv->swi_emitted, 0); DRM_INIT_WAITQUEUE(&dev_priv->swi_queue); + DRM_INIT_WAITQUEUE(&dev_priv->dma_queue); + + /* Clear bits if they're already high */ + radeon_acknowledge_irqs(dev_priv, RADEON_SW_INT_TEST_ACK | + RADEON_CRTC_VBLANK_STAT_ACK | + RADEON_GUI_DMA_INT_ACK); + /* Turn on SW and VBL ints */ RADEON_WRITE(RADEON_GEN_INT_CNTL, - RADEON_CRTC_VBLANK_MASK | RADEON_SW_INT_ENABLE); + RADEON_CRTC_VBLANK_MASK | RADEON_SW_INT_ENABLE | + RADEON_GUI_DMA_INT); } void radeon_driver_irq_uninstall(drm_device_t * dev) diff -urN drm-HEAD.orig/shared-core/radeon_state.c drm-HEAD/shared-core/radeon_state.c --- drm-HEAD.orig/shared-core/radeon_state.c 2005-09-11 18:51:23.000000000 +1000 +++ drm-HEAD/shared-core/radeon_state.c 2005-10-09 10:06:19.000000000 +1000 @@ -3032,6 +3032,7 @@ if (dev_priv->page_flipping) { radeon_do_cleanup_pageflip(dev); } + radeon_dma_release(filp, dev); radeon_mem_release(filp, dev_priv->gart_heap); radeon_mem_release(filp, dev_priv->fb_heap); radeon_surfaces_release(filp, dev_priv); @@ -3100,7 +3101,12 @@ [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH}, [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH}, [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH}, - [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH} + [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}, + [DRM_IOCTL_NR(DRM_RADEON_DMA_PREP)] = {radeon_dma_prepare, DRM_AUTH}, + [DRM_IOCTL_NR(DRM_RADEON_DMA_KICK)] = {radeon_dma_kick, DRM_AUTH}, + [DRM_IOCTL_NR(DRM_RADEON_DMA_SYNC)] = {radeon_dma_sync, DRM_AUTH}, + [DRM_IOCTL_NR(DRM_RADEON_DMA_FREE)] = {radeon_dma_free, DRM_AUTH}, + [DRM_IOCTL_NR(DRM_RADEON_DMA_INIT)] = {radeon_dma_init, DRM_AUTH}, }; int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);