Port the pSeries XICS interrupt controller code to the new code. A single remapper host is created for the whole machine regardless of how many interrupt presentation and source controllers are found and it's set to match any device node that isn't a 8259. That works fine on pSeries and avoid having to deal with some of the complexities of split source controllers vs. presentation controllers in the pSeries device trees. Signed-off-by: Benjamin Herrenschmidt Index: linux-irq-work/arch/powerpc/platforms/pseries/xics.c =================================================================== --- linux-irq-work.orig/arch/powerpc/platforms/pseries/xics.c 2006-07-03 09:49:11.000000000 +1000 +++ linux-irq-work/arch/powerpc/platforms/pseries/xics.c 2006-07-03 09:59:35.000000000 +1000 @@ -8,6 +8,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG + #include #include #include @@ -19,6 +22,7 @@ #include #include #include + #include #include #include @@ -31,9 +35,6 @@ #include "xics.h" -/* This is used to map real irq numbers to virtual */ -static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC); - #define XICS_IPI 2 #define XICS_IRQ_SPURIOUS 0 @@ -64,12 +65,12 @@ static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS]; -static int xics_irq_8259_cascade = 0; -static int xics_irq_8259_cascade_real = 0; static unsigned int default_server = 0xFF; static unsigned int default_distrib_server = 0; static unsigned int interrupt_server_size = 8; +static struct irq_host *xics_host; + /* * XICS only has a single IPI, so encode the messages per CPU */ @@ -85,7 +86,7 @@ /* Direct HW low level accessors */ -static inline int direct_xirr_info_get(int n_cpu) +static inline unsigned int direct_xirr_info_get(int n_cpu) { return in_be32(&xics_per_cpu[n_cpu]->xirr.word); } @@ -130,7 +131,7 @@ return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy); } -static inline int lpar_xirr_info_get(int n_cpu) +static inline unsigned int lpar_xirr_info_get(int n_cpu) { unsigned long lpar_rc; unsigned long return_value; @@ -138,7 +139,7 @@ lpar_rc = plpar_xirr(&return_value); if (lpar_rc != H_SUCCESS) panic(" bad return code xirr - rc = %lx \n", lpar_rc); - return (int)return_value; + return (unsigned int)return_value; } static inline void lpar_xirr_info_set(int n_cpu, int value) @@ -175,11 +176,11 @@ #ifdef CONFIG_SMP -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { unsigned int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[irq].affinity; + cpumask_t cpumask = irq_desc[virq].affinity; cpumask_t tmp = CPU_MASK_NONE; if (!distribute_irqs) @@ -200,7 +201,7 @@ } #else -static int get_irq_server(unsigned int irq) +static int get_irq_server(unsigned int virq) { return default_server; } @@ -213,9 +214,11 @@ int call_status; unsigned int server; - irq = virt_irq_to_real(irq_offset_down(virq)); - WARN_ON(irq == NO_IRQ); - if (irq == XICS_IPI || irq == NO_IRQ) + pr_debug("xics: unmask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + pr_debug(" -> map to hwirq 0x%x\n", irq); + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return; server = get_irq_server(virq); @@ -267,75 +270,57 @@ { unsigned int irq; - irq = virt_irq_to_real(irq_offset_down(virq)); - WARN_ON(irq == NO_IRQ); - if (irq != NO_IRQ) - xics_mask_real_irq(irq); + pr_debug("xics: mask virq %d\n", virq); + + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) + return; + xics_mask_real_irq(irq); } -static void xics_set_irq_revmap(unsigned int virq) +static unsigned int xics_startup(unsigned int virq) { unsigned int irq; - irq = irq_offset_down(virq); - if (radix_tree_insert(&irq_map, virt_irq_to_real(irq), - &virt_irq_to_real_map[irq]) == -ENOMEM) - printk(KERN_CRIT "Out of memory creating real -> virtual" - " IRQ mapping for irq %u (real 0x%x)\n", - virq, virt_irq_to_real(irq)); -} + /* force a reverse mapping of the interrupt so it gets in the cache */ + irq = (unsigned int)irq_map[virq].hwirq; + irq_radix_revmap(xics_host, irq); -static unsigned int xics_startup(unsigned int virq) -{ - xics_set_irq_revmap(virq); + /* unmask it */ xics_unmask_irq(virq); return 0; } -static unsigned int real_irq_to_virt(unsigned int real_irq) -{ - unsigned int *ptr; - - ptr = radix_tree_lookup(&irq_map, real_irq); - if (ptr == NULL) - return NO_IRQ; - return ptr - virt_irq_to_real_map; -} - -static void xics_eoi_direct(unsigned int irq) +static void xics_eoi_direct(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; iosync(); - direct_xirr_info_set(cpu, ((0xff << 24) | - (virt_irq_to_real(irq_offset_down(irq))))); + direct_xirr_info_set(cpu, (0xff << 24) | irq); } -static void xics_eoi_lpar(unsigned int irq) +static void xics_eoi_lpar(unsigned int virq) { int cpu = smp_processor_id(); + unsigned int irq = (unsigned int)irq_map[virq].hwirq; iosync(); - lpar_xirr_info_set(cpu, ((0xff << 24) | - (virt_irq_to_real(irq_offset_down(irq))))); - + lpar_xirr_info_set(cpu, (0xff << 24) | irq); } -static inline int xics_remap_irq(int vec) +static inline unsigned int xics_remap_irq(unsigned int vec) { - int irq; + unsigned int irq; vec &= 0x00ffffff; if (vec == XICS_IRQ_SPURIOUS) return NO_IRQ; - - irq = real_irq_to_virt(vec); - if (irq == NO_IRQ) - irq = real_irq_to_virt_slowpath(vec); + irq = irq_radix_revmap(xics_host, vec); if (likely(irq != NO_IRQ)) - return irq_offset_up(irq); + return irq; printk(KERN_ERR "Interrupt %u (real) is invalid," " disabling it.\n", vec); @@ -343,14 +328,14 @@ return NO_IRQ; } -static int xics_get_irq_direct(struct pt_regs *regs) +static unsigned int xics_get_irq_direct(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); return xics_remap_irq(direct_xirr_info_get(cpu)); } -static int xics_get_irq_lpar(struct pt_regs *regs) +static unsigned int xics_get_irq_lpar(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); @@ -437,8 +422,8 @@ unsigned long newmask; cpumask_t tmp = CPU_MASK_NONE; - irq = virt_irq_to_real(irq_offset_down(virq)); - if (irq == XICS_IPI || irq == NO_IRQ) + irq = (unsigned int)irq_map[virq].hwirq; + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) return; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); @@ -469,6 +454,24 @@ } } +void xics_setup_cpu(void) +{ + int cpu = smp_processor_id(); + + xics_set_cpu_priority(cpu, 0xff); + + /* + * Put the calling processor into the GIQ. This is really only + * necessary from a secondary thread as the OF start-cpu interface + * performs this function for us on primary threads. + * + * XXX: undo of teardown on kexec needs this too, as may hotplug + */ + rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); +} + + static struct irq_chip xics_pic_direct = { .typename = " XICS ", .startup = xics_startup, @@ -489,90 +492,245 @@ }; -void xics_setup_cpu(void) +static int xics_host_match(struct irq_host *h, struct device_node *node) { - int cpu = smp_processor_id(); + /* IBM machines have interrupt parents of various funky types for things + * like vdevices, events, etc... The trick we use here is to match + * everything here except the legacy 8259 which is compatible "chrp,iic" + */ + return !device_is_compatible(node, "chrp,iic"); +} - xics_set_cpu_priority(cpu, 0xff); +static int xics_host_map_direct(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; - /* - * Put the calling processor into the GIQ. This is really only - * necessary from a secondary thread as the OF start-cpu interface - * performs this function for us on primary threads. - * - * XXX: undo of teardown on kexec needs this too, as may hotplug + pr_debug("xics: map_direct virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq); + return 0; +} + +static int xics_host_map_lpar(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw, unsigned int flags) +{ + unsigned int sense = flags & IRQ_TYPE_SENSE_MASK; + + pr_debug("xics: map_lpar virq %d, hwirq 0x%lx, flags: 0x%x\n", + virq, hw, flags); + + if (sense && sense != IRQ_TYPE_LEVEL_LOW) + printk(KERN_WARNING "xics: using unsupported sense 0x%x" + " for irq %d (h: 0x%lx)\n", flags, virq, hw); + + get_irq_desc(virq)->status |= IRQ_LEVEL; + set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq); + return 0; +} + +static int xics_host_xlate(struct irq_host *h, struct device_node *ct, + u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Current xics implementation translates everything + * to level. It is not technically right for MSIs but this + * is irrelevant at this point. We might get smarter in the future */ - rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +static struct irq_host_ops xics_host_direct_ops = { + .match = xics_host_match, + .map = xics_host_map_direct, + .xlate = xics_host_xlate, +}; + +static struct irq_host_ops xics_host_lpar_ops = { + .match = xics_host_match, + .map = xics_host_map_lpar, + .xlate = xics_host_xlate, +}; + +static void __init xics_init_host(void) +{ + struct irq_host_ops *ops; + + if (firmware_has_feature(FW_FEATURE_LPAR)) + ops = &xics_host_lpar_ops; + else + ops = &xics_host_direct_ops; + xics_host = irq_alloc_host(IRQ_HOST_MAP_TREE, 0, ops, + XICS_IRQ_SPURIOUS); + BUG_ON(xics_host == NULL); + irq_set_default_host(xics_host); } -void xics_init_IRQ(void) +static void __init xics_map_one_cpu(int hw_id, unsigned long addr, + unsigned long size) { +#ifdef CONFIG_SMP int i; - unsigned long intr_size = 0; - struct device_node *np; - uint *ireg, ilen, indx = 0; - unsigned long intr_base = 0; - struct xics_interrupt_node { - unsigned long addr; - unsigned long size; - } intnodes[NR_CPUS]; - struct irq_chip *chip; - ppc64_boot_msg(0x20, "XICS Init"); + /* This may look gross but it's good enough for now, we don't quite + * have a hard -> linux processor id matching. + */ + for_each_possible_cpu(i) { + if (!cpu_present(i)) + continue; + if (hw_id == get_hard_smp_processor_id(i)) { + xics_per_cpu[i] = ioremap(addr, size); + return; + } + } +#else + if (hw_id != 0) + return; + xics_per_cpu[0] = ioremap(addr, size); +#endif /* CONFIG_SMP */ +} - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); +static void __init xics_init_one_node(struct device_node *np, + unsigned int *indx) +{ + unsigned int ilen; + u32 *ireg; - np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation"); - if (!np) - panic("xics_init_IRQ: can't find interrupt presentation"); + /* This code does the theorically broken assumption that the interrupt + * server numbers are the same as the hard CPU numbers. + * This happens to be the case so far but we are playing with fire... + * should be fixed one of these days. -BenH. + */ + ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL); -nextnode: - ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL); + /* Do that ever happen ? we'll know soon enough... but even good'old + * f80 does have that property .. + */ + WARN_ON(ireg == NULL); if (ireg) { /* * set node starting index for this node */ - indx = *ireg; + *indx = *ireg; } - - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (!ireg) panic("xics_init_IRQ: can't find interrupt reg property"); - while (ilen) { - intnodes[indx].addr = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].addr |= *ireg++; - ilen -= sizeof(uint); - intnodes[indx].size = (unsigned long)*ireg++ << 32; - ilen -= sizeof(uint); - intnodes[indx].size |= *ireg++; - ilen -= sizeof(uint); - indx++; - if (indx >= NR_CPUS) break; + while (ilen >= (4 * sizeof(u32))) { + unsigned long addr, size; + + /* XXX Use proper OF parsing code here !!! */ + addr = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + addr |= *ireg++; + ilen -= sizeof(u32); + size = (unsigned long)*ireg++ << 32; + ilen -= sizeof(u32); + size |= *ireg++; + ilen -= sizeof(u32); + xics_map_one_cpu(*indx, addr, size); + (*indx)++; + } +} + + +static void __init xics_setup_8259_cascade(void) +{ + struct device_node *np, *old, *found = NULL; + int cascade, naddr; + u32 *addrp; + unsigned long intack = 0; + + for_each_node_by_type(np, "interrupt-controller") + if (device_is_compatible(np, "chrp,iic")) { + found = np; + break; + } + if (found == NULL) { + printk(KERN_DEBUG "xics: no ISA interrupt controller\n"); + return; + } + cascade = irq_of_parse_and_map(found, 0); + if (cascade == NO_IRQ) { + printk(KERN_ERR "xics: failed to map cascade interrupt"); + return; + } + pr_debug("xics: cascade mapped to irq %d\n", cascade); + + for (old = of_node_get(found); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (strcmp(np->name, "pci") != 0) + continue; + addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL); + if (addrp == NULL) + continue; + naddr = prom_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "xics: PCI 8259 intack at 0x%016lx\n", intack); + i8259_init(found, intack); + of_node_put(found); + set_irq_chained_handler(cascade, pseries_8259_cascade); +} + +void __init xics_init_IRQ(void) +{ + int i; + struct device_node *np; + u32 *ireg, ilen, indx = 0; + int found = 0; + + ppc64_boot_msg(0x20, "XICS Init"); + + ibm_get_xive = rtas_token("ibm,get-xive"); + ibm_set_xive = rtas_token("ibm,set-xive"); + ibm_int_on = rtas_token("ibm,int-on"); + ibm_int_off = rtas_token("ibm,int-off"); + + for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") { + found = 1; + if (firmware_has_feature(FW_FEATURE_LPAR)) + break; + xics_init_one_node(np, &indx); } + if (found == 0) + return; - np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation"); - if ((indx < NR_CPUS) && np) goto nextnode; + xics_init_host(); /* Find the server numbers for the boot cpu. */ for (np = of_find_node_by_type(NULL, "cpu"); np; np = of_find_node_by_type(np, "cpu")) { - ireg = (uint *)get_property(np, "reg", &ilen); + ireg = (u32 *)get_property(np, "reg", &ilen); if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { - ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", - &ilen); + ireg = (u32 *)get_property(np, + "ibm,ppc-interrupt-gserver#s", + &ilen); i = ilen / sizeof(int); if (ireg && i > 0) { default_server = ireg[0]; - default_distrib_server = ireg[i-1]; /* take last element */ + /* take last element */ + default_distrib_server = ireg[i-1]; } - ireg = (uint *)get_property(np, + ireg = (u32 *)get_property(np, "ibm,interrupt-server#-size", NULL); if (ireg) interrupt_server_size = *ireg; @@ -581,102 +739,47 @@ } of_node_put(np); - intr_base = intnodes[0].addr; - intr_size = intnodes[0].size; - - if (firmware_has_feature(FW_FEATURE_LPAR)) { - ppc_md.get_irq = xics_get_irq_lpar; - chip = &xics_pic_lpar; - } else { -#ifdef CONFIG_SMP - for_each_possible_cpu(i) { - int hard_id; - - /* FIXME: Do this dynamically! --RR */ - if (!cpu_present(i)) - continue; - - hard_id = get_hard_smp_processor_id(i); - xics_per_cpu[i] = ioremap(intnodes[hard_id].addr, - intnodes[hard_id].size); - } -#else - xics_per_cpu[0] = ioremap(intr_base, intr_size); -#endif /* CONFIG_SMP */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + ppc_md.get_irq = xics_get_irq_lpar; + else ppc_md.get_irq = xics_get_irq_direct; - chip = &xics_pic_direct; - - } - - for (i = irq_offset_value(); i < NR_IRQS; ++i) { - /* All IRQs on XICS are level for now. MSI code may want to modify - * that for reporting purposes - */ - get_irq_desc(i)->status |= IRQ_LEVEL; - set_irq_chip_and_handler(i, chip, handle_fasteoi_irq); - } xics_setup_cpu(); - ppc64_boot_msg(0x21, "XICS Done"); -} + xics_setup_8259_cascade(); -static int xics_setup_8259_cascade(void) -{ - struct device_node *np; - uint *ireg; - - np = of_find_node_by_type(NULL, "interrupt-controller"); - if (np == NULL) { - printk(KERN_WARNING "xics: no ISA interrupt controller\n"); - xics_irq_8259_cascade_real = -1; - xics_irq_8259_cascade = -1; - return 0; - } - - ireg = (uint *) get_property(np, "interrupts", NULL); - if (!ireg) - panic("xics_init_IRQ: can't find ISA interrupts property"); - - xics_irq_8259_cascade_real = *ireg; - xics_irq_8259_cascade = irq_offset_up - (virt_irq_create_mapping(xics_irq_8259_cascade_real)); - i8259_init(0, 0); - of_node_put(np); - - xics_set_irq_revmap(xics_irq_8259_cascade); - set_irq_chained_handler(xics_irq_8259_cascade, pSeries_8259_cascade); - - return 0; + ppc64_boot_msg(0x21, "XICS Done"); } -arch_initcall(xics_setup_8259_cascade); #ifdef CONFIG_SMP void xics_request_IPIs(void) { - virt_irq_to_real_map[XICS_IPI] = XICS_IPI; + unsigned int ipi; + + ipi = irq_create_mapping(xics_host, XICS_IPI, 0); + BUG_ON(ipi == NO_IRQ); /* * IPIs are marked IRQF_DISABLED as they must run with irqs * disabled */ - set_irq_handler(irq_offset_up(XICS_IPI), handle_percpu_irq); + set_irq_handler(ipi, handle_percpu_irq); if (firmware_has_feature(FW_FEATURE_LPAR)) - request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_lpar, - SA_INTERRUPT, "IPI", NULL); + request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED, + "IPI", NULL); else - request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_direct, - SA_INTERRUPT, "IPI", NULL); + request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED, + "IPI", NULL); } -#endif /* CONFIG_SMP */ void xics_teardown_cpu(int secondary) { - struct irq_desc *desc = get_irq_desc(irq_offset_up(XICS_IPI)); int cpu = smp_processor_id(); + unsigned int ipi; + struct irq_desc *desc; - xics_set_cpu_priority(cpu, 0); + xics_set_cpu_priority(cpu, 0); /* * we need to EOI the IPI if we got here from kexec down IPI @@ -685,6 +788,11 @@ * should we be flagging idle loop instead? * or creating some task to be scheduled? */ + + ipi = irq_find_mapping(xics_host, XICS_IPI); + if (ipi == XICS_IRQ_SPURIOUS) + return; + desc = get_irq_desc(ipi); if (desc->chip && desc->chip->eoi) desc->chip->eoi(XICS_IPI); @@ -694,8 +802,8 @@ */ if (secondary) rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, - (1UL << interrupt_server_size) - 1 - - default_distrib_server, 0); + (1UL << interrupt_server_size) - 1 - + default_distrib_server, 0); } #ifdef CONFIG_HOTPLUG_CPU @@ -723,15 +831,15 @@ unsigned long flags; /* We cant set affinity on ISA interrupts */ - if (virq < irq_offset_value()) + if (virq < NUM_ISA_INTERRUPTS) continue; - - desc = get_irq_desc(virq); - irq = virt_irq_to_real(irq_offset_down(virq)); - + if (irq_map[virq].host != xics_host) + continue; + irq = (unsigned int)irq_map[virq].hwirq; /* We need to get IPIs still. */ - if (irq == XICS_IPI || irq == NO_IRQ) + if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) continue; + desc = get_irq_desc(virq); /* We only need to migrate enabled IRQS */ if (desc == NULL || desc->chip == NULL Index: linux-irq-work/arch/powerpc/platforms/pseries/xics.h =================================================================== --- linux-irq-work.orig/arch/powerpc/platforms/pseries/xics.h 2006-07-03 09:46:47.000000000 +1000 +++ linux-irq-work/arch/powerpc/platforms/pseries/xics.h 2006-07-03 09:50:44.000000000 +1000 @@ -31,7 +31,7 @@ extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned; struct irq_desc; -extern void pSeries_8259_cascade(unsigned int irq, struct irq_desc *desc, +extern void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs); #endif /* _POWERPC_KERNEL_XICS_H */