Index: linux-work/arch/powerpc/mm/hash_low_64.S =================================================================== --- linux-work.orig/arch/powerpc/mm/hash_low_64.S 2005-11-08 11:00:17.000000000 +1100 +++ linux-work/arch/powerpc/mm/hash_low_64.S 2005-12-13 10:32:33.000000000 +1100 @@ -54,8 +54,9 @@ /* - * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local) + * _hash_page_4K(unsigned long ea, unsigned long access, + * unsigned long vsid, pte_t *ptep, unsigned long trap, + * int ssize, int local) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -67,6 +68,7 @@ _GLOBAL(__hash_page_4K) /* Save all params that we need after a function call */ std r6,STK_PARM(r6)(r1) std r8,STK_PARM(r8)(r1) + std r9,STK_PARM(r9)(r1) /* Add _PAGE_PRESENT to access */ ori r4,r4,_PAGE_PRESENT @@ -118,14 +120,25 @@ _GLOBAL(__hash_page_4K) * r4 (access) is re-useable, we use it for the new HPTE flags */ - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 - rldicl r3,r3,0,36 + /* Calc va and put it in r29, then calculate the hash value for + * primary slot and store it in r28 + */ +BEGIN_FTR_SECTION + cmpi cr0,r8,0 + beq 1f + sldi r29,r5,40 + rldicl r3,r3,0,64-40 + or r29,r3,r29 + rldicl r5,r5,0,37 /* vsid & 0x0000000007ffffff */ + rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */ + b 2f +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +1: sldi r29,r5,28 + rldicl r3,r3,0,64-28 or r29,r3,r29 - - /* Calculate hash value for primary slot and store it in r28 */ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */ +2: xor r28,r5,r0 /* Convert linux PTE bits into HW equivalents */ @@ -183,6 +196,7 @@ htab_insert_pte: mr r4,r29 /* Retreive va */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(htab_call_hpte_insert1) bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -205,6 +219,7 @@ _GLOBAL(htab_call_hpte_insert1) mr r4,r29 /* Retreive va */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(htab_call_hpte_insert2) bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -273,7 +288,8 @@ htab_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* va */ li r6,MMU_PAGE_4K /* page size */ - ld r7,STK_PARM(r8)(r1) /* get "local" param */ + ld r7,STK_PARM(r8)(r1) /* get "ssize" param */ + ld r8,STK_PARM(r9)(r1) /* get "local" param */ _GLOBAL(htab_call_hpte_updatepp) bl . /* Patched by htab_finish_init() */ @@ -311,8 +327,10 @@ htab_pte_insert_failure: * * *****************************************************************************/ -/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local) +/* + * _hash_page_4K(unsigned long ea, unsigned long access, + * unsigned long vsid, pte_t *ptep, unsigned long trap, + * int ssize, int local) */ /* @@ -325,6 +343,7 @@ _GLOBAL(__hash_page_4K) /* Save all params that we need after a function call */ std r6,STK_PARM(r6)(r1) std r8,STK_PARM(r8)(r1) + std r9,STK_PARM(r9)(r1) /* Add _PAGE_PRESENT to access */ ori r4,r4,_PAGE_PRESENT @@ -382,14 +401,25 @@ _GLOBAL(__hash_page_4K) /* Load the hidx index */ rldicl r25,r3,64-12,60 - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ - rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ - or r29,r3,r29 /* r29 = va - - /* Calculate hash value for primary slot and store it in r28 */ + /* Calc va and put it in r29, then calculate the hash value for + * primary slot and store it in r28 + */ +BEGIN_FTR_SECTION + cmpi cr0,r8,0 + beq 1f + sldi r29,r5,40 + rldicl r3,r3,0,64-40 + or r29,r3,r29 + rldicl r5,r5,0,37 /* vsid & 0x0000000007ffffff */ + rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */ + b 2f +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +1: sldi r29,r5,28 + rldicl r3,r3,0,64-28 + or r29,r3,r29 rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */ +2: xor r28,r5,r0 /* Convert linux PTE bits into HW equivalents */ @@ -449,6 +479,7 @@ htab_insert_pte: mr r4,r29 /* Retreive va */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(htab_call_hpte_insert1) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -473,6 +504,7 @@ _GLOBAL(htab_call_hpte_insert1) mr r4,r29 /* Retreive va */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(htab_call_hpte_insert2) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -558,7 +590,8 @@ htab_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* va */ li r6,MMU_PAGE_4K /* page size */ - ld r7,STK_PARM(r8)(r1) /* get "local" param */ + ld r7,STK_PARM(r8)(r1) /* get "ssize" param */ + ld r8,STK_PARM(r9)(r1) /* get "local" param */ _GLOBAL(htab_call_hpte_updatepp) bl . /* patched by htab_finish_init() */ @@ -596,6 +629,11 @@ htab_pte_insert_failure: * * *****************************************************************************/ +/* + * _hash_page_64K(unsigned long ea, unsigned long access, + * unsigned long vsid, pte_t *ptep, unsigned long trap, + * int ssize, int local) + */ _GLOBAL(__hash_page_64K) mflr r0 std r0,16(r1) @@ -603,6 +641,7 @@ _GLOBAL(__hash_page_64K) /* Save all params that we need after a function call */ std r6,STK_PARM(r6)(r1) std r8,STK_PARM(r8)(r1) + std r9,STK_PARM(r9)(r1) /* Add _PAGE_PRESENT to access */ ori r4,r4,_PAGE_PRESENT @@ -653,14 +692,25 @@ _GLOBAL(__hash_page_64K) * r4 (access) is re-useable, we use it for the new HPTE flags */ - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 - rldicl r3,r3,0,36 + /* Calc va and put it in r29, then calculate the hash value for + * primary slot and store it in r28 + */ +BEGIN_FTR_SECTION + cmpi cr0,r8,0 + beq 1f + sldi r29,r5,40 + rldicl r3,r3,0,64-40 + or r29,r3,r29 + rldicl r5,r5,0,37 /* vsid & 0x0000000007ffffff */ + rldicl r0,r3,64-16,40 /* (ea >> 16) & 0xffffff */ + b 2f +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +1: sldi r29,r5,28 + rldicl r3,r3,0,64-28 or r29,r3,r29 - - /* Calculate hash value for primary slot and store it in r28 */ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */ +2: xor r28,r5,r0 /* Convert linux PTE bits into HW equivalents */ @@ -718,6 +768,7 @@ ht64_insert_pte: mr r4,r29 /* Retreive va */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_64K + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(ht64_call_hpte_insert1) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -740,6 +791,7 @@ _GLOBAL(ht64_call_hpte_insert1) mr r4,r29 /* Retreive va */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_64K + ld r9,STK_PARM(r8)(r1) /* get "ssize" param */ _GLOBAL(ht64_call_hpte_insert2) bl . /* patched by htab_finish_init() */ cmpdi 0,r3,0 @@ -808,7 +860,8 @@ ht64_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* va */ li r6,MMU_PAGE_64K - ld r7,STK_PARM(r8)(r1) /* get "local" param */ + ld r7,STK_PARM(r8)(r1) /* get "ssize" param */ + ld r8,STK_PARM(r9)(r1) /* get "local" param */ _GLOBAL(ht64_call_hpte_updatepp) bl . /* patched by htab_finish_init() */ Index: linux-work/arch/powerpc/mm/hash_native_64.c =================================================================== --- linux-work.orig/arch/powerpc/mm/hash_native_64.c 2005-11-08 11:00:17.000000000 +1100 +++ linux-work/arch/powerpc/mm/hash_native_64.c 2005-12-13 10:32:33.000000000 +1100 @@ -37,7 +37,7 @@ static DEFINE_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long va, unsigned int psize) +static inline void __tlbie(unsigned long va, int psize, int ssize) { unsigned int penc; @@ -47,18 +47,20 @@ static inline void __tlbie(unsigned long switch (psize) { case MMU_PAGE_4K: va &= ~0xffful; + va |= (ssize << 8); asm volatile("tlbie %0,0" : : "r" (va) : "memory"); break; default: penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); va |= (0x7f >> (8 - penc)) << 12; + va |= (ssize << 8); asm volatile("tlbie %0,1" : : "r" (va) : "memory"); break; } } -static inline void __tlbiel(unsigned long va, unsigned int psize) +static inline void __tlbiel(unsigned long va, int psize, int ssize) { unsigned int penc; @@ -68,6 +70,7 @@ static inline void __tlbiel(unsigned lon switch (psize) { case MMU_PAGE_4K: va &= ~0xffful; + va |= (ssize << 8); asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); break; @@ -75,6 +78,7 @@ static inline void __tlbiel(unsigned lon penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); va |= (0x7f >> (8 - penc)) << 12; + va |= (ssize << 8); asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" : : "r"(va) : "memory"); break; @@ -82,7 +86,7 @@ static inline void __tlbiel(unsigned lon } -static inline void tlbie(unsigned long va, int psize, int local) +static inline void tlbie(unsigned long va, int psize, int ssize, int local) { unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL); int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); @@ -93,10 +97,10 @@ static inline void tlbie(unsigned long v spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { - __tlbiel(va, psize); + __tlbiel(va, psize, ssize); asm volatile("ptesync": : :"memory"); } else { - __tlbie(va, psize); + __tlbie(va, psize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -125,7 +129,7 @@ static inline void native_unlock_hpte(hp long native_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long pa, unsigned long rflags, - unsigned long vflags, int psize) + unsigned long vflags, int psize, int ssize) { hpte_t *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; @@ -152,7 +156,7 @@ long native_hpte_insert(unsigned long hp if (i == HPTES_PER_GROUP) return -1; - hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { @@ -214,13 +218,14 @@ static long native_hpte_remove(unsigned } static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int psize, int local) + unsigned long va, int psize, int ssize, + int local) { hpte_t *hptep = htab_address + slot; unsigned long hpte_v, want_v; int ret = 0; - want_v = hpte_encode_v(va, psize); + want_v = hpte_encode_v(va, psize, ssize); DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)", va, want_v & HPTE_V_AVPN, slot, newpp); @@ -243,12 +248,12 @@ static long native_hpte_updatepp(unsigne } /* Ensure it is out of the tlb too. */ - tlbie(va, psize, local); + tlbie(va, psize, ssize, local); return ret; } -static long native_hpte_find(unsigned long va, int psize) +static long native_hpte_find(unsigned long va, int psize, int ssize) { hpte_t *hptep; unsigned long hash; @@ -256,8 +261,8 @@ static long native_hpte_find(unsigned lo long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift); - want_v = hpte_encode_v(va, psize); + hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_v(va, psize, ssize); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -289,16 +294,16 @@ static long native_hpte_find(unsigned lo * No need to lock here because we should be the only user. */ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, - int psize) + int psize, int ssize) { unsigned long vsid, va; long slot; hpte_t *hptep; - vsid = get_kernel_vsid(ea); - va = (vsid << 28) | (ea & 0x0fffffff); + vsid = get_kernel_vsid(ea, ssize); + va = hpt_va(ea, vsid, ssize); - slot = native_hpte_find(va, psize); + slot = native_hpte_find(va, psize, ssize); if (slot == -1) panic("could not find page to bolt\n"); hptep = htab_address + slot; @@ -308,11 +313,11 @@ static void native_hpte_updateboltedpp(u (newpp & (HPTE_R_PP | HPTE_R_N)); /* Ensure it is out of the tlb too. */ - tlbie(va, psize, 0); + tlbie(va, psize, ssize, 0); } static void native_hpte_invalidate(unsigned long slot, unsigned long va, - int psize, int local) + int psize, int ssize, int local) { hpte_t *hptep = htab_address + slot; unsigned long hpte_v; @@ -323,7 +328,7 @@ static void native_hpte_invalidate(unsig DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot); - want_v = hpte_encode_v(va, psize); + want_v = hpte_encode_v(va, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; @@ -335,7 +340,7 @@ static void native_hpte_invalidate(unsig hptep->v = 0; /* Invalidate the TLB */ - tlbie(va, psize, local); + tlbie(va, psize, ssize, local); local_irq_restore(flags); } @@ -375,7 +380,7 @@ static unsigned long slot2va(unsigned lo * TODO: add batching support when enabled. remember, no dynamic memory here, * athough there is the control page available... * - * XXX FIXME: 4k only for now ! + * XXX FIXME: 4k only and 256Mb segments only for now ! */ static void native_hpte_clear(void) { @@ -405,7 +410,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hptep->v = 0; - tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0); + tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0, 0); } } @@ -426,7 +431,8 @@ static void native_flush_hash_range(unsi unsigned long flags; real_pte_t pte; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - unsigned long psize = batch->psize; + int psize = batch->psize; + int ssize = batch->ssize; int i; local_irq_save(flags); @@ -436,14 +442,14 @@ static void native_flush_hash_range(unsi pte = batch->pte[i]; pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift); + hash = hpt_hash(va, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; hptep = htab_address + slot; - want_v = hpte_encode_v(va, psize); + want_v = hpte_encode_v(va, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; if (!HPTE_V_COMPARE(hpte_v, want_v) || @@ -463,7 +469,7 @@ static void native_flush_hash_range(unsi pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - __tlbiel(va, psize); + __tlbiel(va, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("ptesync":::"memory"); @@ -480,7 +486,7 @@ static void native_flush_hash_range(unsi pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - __tlbie(va, psize); + __tlbie(va, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("eieio; tlbsync; ptesync":::"memory"); Index: linux-work/arch/powerpc/mm/hash_utils_64.c =================================================================== --- linux-work.orig/arch/powerpc/mm/hash_utils_64.c 2005-12-12 17:33:39.000000000 +1100 +++ linux-work/arch/powerpc/mm/hash_utils_64.c 2005-12-13 11:19:29.000000000 +1100 @@ -135,7 +135,8 @@ struct mmu_psize_def mmu_psize_defaults_ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, - unsigned long pstart, unsigned long mode, int psize) + unsigned long pstart, unsigned long mode, + int psize, int ssize) { unsigned long vaddr, paddr; unsigned int step, shift; @@ -148,8 +149,8 @@ int htab_bolt_mapping(unsigned long vsta for (vaddr = vstart, paddr = pstart; vaddr < vend; vaddr += step, paddr += step) { unsigned long vpn, hash, hpteg; - unsigned long vsid = get_kernel_vsid(vaddr); - unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); + unsigned long vsid = get_kernel_vsid(vaddr, ssize); + unsigned long va = hpt_va(vaddr, vsid, ssize); vpn = va >> shift; tmp_mode = mode; @@ -158,7 +159,7 @@ int htab_bolt_mapping(unsigned long vsta if (!in_kernel_text(vaddr)) tmp_mode = mode | HPTE_R_N; - hash = hpt_hash(va, shift); + hash = hpt_hash(va, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); /* The crap below can be cleaned once ppd_md.probe() can @@ -171,7 +172,7 @@ int htab_bolt_mapping(unsigned long vsta virt_to_abs(paddr), tmp_mode, HPTE_V_BOLTED, - psize); + psize, ssize); else #endif #ifdef CONFIG_PPC_PSERIES @@ -180,14 +181,14 @@ int htab_bolt_mapping(unsigned long vsta virt_to_abs(paddr), tmp_mode, HPTE_V_BOLTED, - psize); + psize, ssize); else #endif #ifdef CONFIG_PPC_MULTIPLATFORM ret = native_hpte_insert(hpteg, va, virt_to_abs(paddr), tmp_mode, HPTE_V_BOLTED, - psize); + psize, ssize); #endif if (ret < 0) break; @@ -195,6 +196,35 @@ int htab_bolt_mapping(unsigned long vsta return ret < 0 ? ret : 0; } +static int __init htab_dt_scan_seg_sizes(unsigned long node, + const char *uname, int depth, + void *data) +{ + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + u32 *prop; + unsigned long size = 0; + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + prop = (u32 *)of_get_flat_dt_prop(node, + "ibm,processor-segment-sizes", &size); + if (prop != NULL && size >= 8) { + if (prop[0] == 0x1c && prop[1] == 0x28) { + DBG("1T segment support detected\n"); + cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT; + } + return 1; + } + return 0; +} + +static void __init htab_init_seg_sizes(void) +{ + of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); +} + static int __init htab_dt_scan_page_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -274,7 +304,6 @@ static int __init htab_dt_scan_page_size return 0; } - static void __init htab_init_page_sizes(void) { int rc; @@ -336,10 +365,12 @@ static void __init htab_init_page_sizes( /* Calculate HPAGE_SHIFT and sanity check it */ if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT && - mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT) + mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT_256M) HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift; else HPAGE_SHIFT = 0; /* No huge pages dude ! */ + + printk(KERN_INFO "Page orders: huge pages = %d\n", HPAGE_SHIFT); #endif /* CONFIG_HUGETLB_PAGE */ } @@ -403,15 +434,25 @@ void __init htab_initialize(void) unsigned long pteg_count; unsigned long mode_rw; unsigned long base = 0, size = 0; + int ssize = MMU_SEGSIZE_256M; int i; extern unsigned long tce_alloc_start, tce_alloc_end; DBG(" -> htab_initialize()\n"); + /* Initialize segments sizes */ + htab_init_seg_sizes(); + /* Initialize page sizes */ htab_init_page_sizes(); + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + ssize = MMU_SEGSIZE_1T; + printk(KERN_INFO "Using 1TB segments\n"); + } else + printk(KERN_INFO "Using 256MB segments\n"); + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -459,7 +500,8 @@ void __init htab_initialize(void) base = lmb.memory.region[i].base + KERNELBASE; size = lmb.memory.region[i].size; - DBG("creating mapping for region: %lx : %lx\n", base, size); + DBG("creating mapping for region: %lx : %lx (psize %d)\n", + base, size, mmu_linear_psize); #ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned @@ -476,18 +518,20 @@ void __init htab_initialize(void) if (base != dart_tablebase) BUG_ON(htab_bolt_mapping(base, dart_tablebase, base, mode_rw, - mmu_linear_psize)); + mmu_linear_psize, + ssize)); if ((base + size) > (dart_tablebase + 16*MB)) BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, base + size, dart_tablebase+16*MB, mode_rw, - mmu_linear_psize)); + mmu_linear_psize, + ssize)); continue; } #endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, base, - mode_rw, mmu_linear_psize)); + mode_rw, mmu_linear_psize, ssize)); } /* @@ -506,7 +550,7 @@ void __init htab_initialize(void) BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, tce_alloc_start, mode_rw, - mmu_linear_psize)); + mmu_linear_psize, ssize)); } DBG(" <- htab_initialize()\n"); @@ -556,6 +600,7 @@ int hash_page(unsigned long ea, unsigned pte_t *ptep; cpumask_t tmp; int rc, user_region = 0, local = 0; + int ssize = MMU_SEGSIZE_256M; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); @@ -574,11 +619,17 @@ int hash_page(unsigned long ea, unsigned DBG_LOW(" user region with no mm !\n"); return 1; } - vsid = get_vsid(mm->context.id, ea); + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + BUG_ON(mm->context.segsize < 0); + ssize = mm->context.segsize; + } + vsid = get_vsid(mm->context.id, ea, ssize); break; case VMALLOC_REGION_ID: + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_1T; mm = &init_mm; - vsid = get_kernel_vsid(ea); + vsid = get_kernel_vsid(ea, ssize); break; default: /* Not a valid range @@ -586,7 +637,8 @@ int hash_page(unsigned long ea, unsigned */ return 1; } - DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); + DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx, ssize=%d\n", + mm, mm->pgd, vsid, ssize); /* Get pgdir */ pgdir = mm->pgd; @@ -601,7 +653,7 @@ int hash_page(unsigned long ea, unsigned /* Handle hugepage regions */ if (unlikely(in_hugepage_area(mm->context, ea))) { DBG_LOW(" -> huge page !\n"); - return hash_huge_page(mm, access, ea, vsid, local, trap); + return hash_huge_page(mm, access, ea, vsid, ssize, local, trap); } /* Get PTE and page size from page tables */ @@ -627,12 +679,12 @@ int hash_page(unsigned long ea, unsigned /* Do actual hashing */ #ifndef CONFIG_PPC_64K_PAGES - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, ssize, local); #else if (mmu_virtual_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); + rc = __hash_page_64K(ea, access, vsid, ptep, trap,ssize,local); else - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); + rc = __hash_page_4K(ea, access, vsid, ptep, trap,ssize,local); #endif /* CONFIG_PPC_64K_PAGES */ #ifndef CONFIG_PPC_64K_PAGES @@ -654,6 +706,7 @@ void hash_preload(struct mm_struct *mm, cpumask_t mask; unsigned long flags; int local = 0; + int ssize = MMU_SEGSIZE_256M; /* We don't want huge pages prefaulted for now */ @@ -670,7 +723,11 @@ void hash_preload(struct mm_struct *mm, ptep = find_linux_pte(pgdir, ea); if (!ptep) return; - vsid = get_vsid(mm->context.id, ea); + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + BUG_ON(mm->context.segsize < 0); + ssize = mm->context.segsize; + } + vsid = get_vsid(mm->context.id, ea, ssize); /* Hash it in */ local_irq_save(flags); @@ -678,30 +735,31 @@ void hash_preload(struct mm_struct *mm, if (cpus_equal(mm->cpu_vm_mask, mask)) local = 1; #ifndef CONFIG_PPC_64K_PAGES - __hash_page_4K(ea, access, vsid, ptep, trap, local); + __hash_page_4K(ea, access, vsid, ptep, trap, ssize, local); #else if (mmu_virtual_psize == MMU_PAGE_64K) - __hash_page_64K(ea, access, vsid, ptep, trap, local); + __hash_page_64K(ea, access, vsid, ptep, trap, ssize, local); else - __hash_page_4K(ea, access, vsid, ptep, trap, local); + __hash_page_4K(ea, access, vsid, ptep, trap, ssize, local); #endif /* CONFIG_PPC_64K_PAGES */ local_irq_restore(flags); } -void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local) +void flush_hash_page(unsigned long va, real_pte_t pte, + int psize, int ssize, int local) { unsigned long hash, index, shift, hidx, slot; DBG_LOW("flush_hash_page(va=%016x)\n", va); pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift); + hash = hpt_hash(va, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); - ppc_md.hpte_invalidate(slot, va, psize, local); + ppc_md.hpte_invalidate(slot, va, psize, ssize, local); } pte_iterate_hashed_end(); } @@ -716,7 +774,7 @@ void flush_hash_range(unsigned long numb for (i = 0; i < number; i++) flush_hash_page(batch->vaddr[i], batch->pte[i], - batch->psize, local); + batch->psize, batch->ssize, local); } } Index: linux-work/arch/powerpc/mm/tlb_64.c =================================================================== --- linux-work.orig/arch/powerpc/mm/tlb_64.c 2005-11-24 17:03:09.000000000 +1100 +++ linux-work/arch/powerpc/mm/tlb_64.c 2005-12-13 12:48:34.000000000 +1100 @@ -132,6 +132,7 @@ void hpte_update(struct mm_struct *mm, u struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long vsid; unsigned int psize = mmu_virtual_psize; + int ssize = MMU_SEGSIZE_256M; int i; i = batch->index; @@ -150,6 +151,17 @@ void hpte_update(struct mm_struct *mm, u #endif } + if (addr < KERNELBASE) { + ssize = mm->context.segsize; + WARN_ON(ssize < 0); + vsid = get_vsid(mm->context.id, addr, mm->context.segsize); + WARN_ON(vsid == 0); + } else { + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_1T; + vsid = get_kernel_vsid(addr, ssize); + } + /* * This can happen when we are in the middle of a TLB batch and * we encounter memory pressure (eg copy_page_range when it tries @@ -160,20 +172,18 @@ void hpte_update(struct mm_struct *mm, u * We also need to ensure only one page size is present in a given * batch */ - if (i != 0 && (mm != batch->mm || batch->psize != psize)) { + if (i != 0 && (mm != batch->mm || batch->psize != psize || + batch->ssize != ssize)) { flush_tlb_pending(); i = 0; } if (i == 0) { batch->mm = mm; batch->psize = psize; + batch->ssize = ssize; } - if (addr < KERNELBASE) { - vsid = get_vsid(mm->context.id, addr); - WARN_ON(vsid == 0); - } else - vsid = get_kernel_vsid(addr); - batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); + + batch->vaddr[i] = hpt_va(addr, vsid, ssize); batch->pte[i] = __real_pte(__pte(pte), ptep); batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) @@ -197,7 +207,7 @@ void __flush_tlb_pending(struct ppc64_tl if (i == 1) flush_hash_page(batch->vaddr[0], batch->pte[0], - batch->psize, local); + batch->psize, batch->ssize, local); else flush_hash_range(i, local); batch->index = 0; Index: linux-work/include/asm-powerpc/cputable.h =================================================================== --- linux-work.orig/include/asm-powerpc/cputable.h 2005-11-21 11:53:15.000000000 +1100 +++ linux-work/include/asm-powerpc/cputable.h 2005-12-13 10:32:33.000000000 +1100 @@ -106,6 +106,7 @@ extern void do_cpu_ftr_fixups(unsigned l #define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000) +#define CPU_FTR_1T_SEGMENT ASM_CONST(0x0000200000000000) #else /* ensure on 32b processors the flags are available for compiling but * don't do anything */ @@ -121,6 +122,7 @@ extern void do_cpu_ftr_fixups(unsigned l #define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0) #define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0) +#define CPU_FTR_1T_SEGMENT ASM_CONST(0x0) #endif #ifndef __ASSEMBLY__ @@ -347,7 +349,7 @@ enum { #ifdef __powerpc64__ CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL | - CPU_FTR_CI_LARGE_PAGE | + CPU_FTR_CI_LARGE_PAGE | CPU_FTR_1T_SEGMENT | #endif 0, Index: linux-work/include/asm-powerpc/machdep.h =================================================================== --- linux-work.orig/include/asm-powerpc/machdep.h 2005-11-15 13:31:58.000000000 +1100 +++ linux-work/include/asm-powerpc/machdep.h 2005-12-13 10:32:33.000000000 +1100 @@ -47,22 +47,22 @@ struct machdep_calls { #ifdef CONFIG_PPC64 void (*hpte_invalidate)(unsigned long slot, unsigned long va, - int psize, + int psize, int ssize, int local); long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, unsigned long va, - int pize, + int psize, int ssize, int local); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, - int psize); + int psize, int ssize); long (*hpte_insert)(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long rflags, unsigned long vflags, - int psize); + int psize, int ssize); long (*hpte_remove)(unsigned long hpte_group); void (*flush_hash_range)(unsigned long number, int local); Index: linux-work/arch/powerpc/kernel/process.c =================================================================== --- linux-work.orig/arch/powerpc/kernel/process.c 2005-12-05 14:33:46.000000000 +1100 +++ linux-work/arch/powerpc/kernel/process.c 2005-12-13 13:14:56.000000000 +1100 @@ -459,6 +459,31 @@ void exit_thread(void) discard_lazy_cpu_state(); } +#ifdef CONFIG_PPC64 +static void set_mm_segsize(struct task_struct *tsk) +{ + struct thread_info *t = tsk->thread_info; + struct mm_struct *mm = tsk->mm; + int ssize; + + if (mm == NULL) + return; + if ((t->flags & _TIF_32BIT) || !cpu_has_feature(CPU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_256M; + else + ssize = MMU_SEGSIZE_1T; + WARN_ON (mm->context.segsize != -1 && mm->context.segsize != ssize); + if (mm->context.segsize == -1) { + preempt_disable(); + mm->context.segsize = ssize; + if (current == tsk) + get_paca()->context.segsize = ssize; + preempt_enable(); + } +} +#endif /* CONFIG_PPC64 */ + + void flush_thread(void) { #ifdef CONFIG_PPC64 @@ -466,7 +491,8 @@ void flush_thread(void) if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); -#endif + set_mm_segsize(current); +#endif /* CONFIG_PPC64 */ discard_lazy_cpu_state(); @@ -548,12 +574,14 @@ int copy_thread(int nr, unsigned long cl p->thread.ksp = sp; #ifdef CONFIG_PPC64 + set_mm_segsize(p); + if (cpu_has_feature(CPU_FTR_SLB)) { - unsigned long sp_vsid = get_kernel_vsid(sp); + int ssize = cpu_has_feature(CPU_FTR_1T_SEGMENT) ? + MMU_SEGSIZE_1T : MMU_SEGSIZE_256M; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; - - sp_vsid <<= SLB_VSID_SHIFT; - sp_vsid |= SLB_VSID_KERNEL | llp; + unsigned long sp_vsid = + slb_mk_v(sp, SLB_VSID_KERNEL | llp, ssize); p->thread.ksp_vsid = sp_vsid; } Index: linux-work/arch/powerpc/platforms/pseries/lpar.c =================================================================== --- linux-work.orig/arch/powerpc/platforms/pseries/lpar.c 2005-12-12 17:33:39.000000000 +1100 +++ linux-work/arch/powerpc/platforms/pseries/lpar.c 2005-12-13 10:32:33.000000000 +1100 @@ -279,7 +279,7 @@ void vpa_init(int cpu) long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long pa, unsigned long rflags, unsigned long vflags, - int psize) + int psize, int ssize) { unsigned long lpar_rc; unsigned long flags; @@ -287,16 +287,16 @@ long pSeries_lpar_hpte_insert(unsigned l unsigned long hpte_v, hpte_r; unsigned long dummy0, dummy1; - if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " - "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); +if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " + "rflags=%lx, vflags=%lx, psize=%d, ssize: %d)\n", + hpte_group, va, pa, rflags, vflags, psize, ssize); - hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; - if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); +if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ @@ -313,8 +313,8 @@ long pSeries_lpar_hpte_insert(unsigned l lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, hpte_r, &slot, &dummy0, &dummy1); if (unlikely(lpar_rc == H_PTEG_Full)) { - if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" full\n"); +if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" full\n"); return -1; } @@ -324,12 +324,12 @@ long pSeries_lpar_hpte_insert(unsigned l * or we will loop forever, so return -2 in this case. */ if (unlikely(lpar_rc != H_Success)) { - if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" lpar err %d\n", lpar_rc); +if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" lpar err %d\n", lpar_rc); return -2; } - if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" -> slot: %d\n", slot & 7); +if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" -> slot: %d\n", slot & 7); /* Because of iSeries, we have to pass down the secondary * bucket bit here as well @@ -386,16 +386,17 @@ static void pSeries_lpar_hptab_clear(voi static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long va, - int psize, int local) + int psize, int ssize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; unsigned long want_v; - want_v = hpte_encode_v(va, psize); + want_v = hpte_encode_v(va, psize, ssize); - DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ", - want_v & HPTE_V_AVPN, slot, flags, psize); + DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, " + "psize: %d, ssize: %d ... ", + want_v & HPTE_V_AVPN, slot, flags, psize, ssize); lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN); @@ -430,15 +431,15 @@ static unsigned long pSeries_lpar_hpte_g return dword0; } -static long pSeries_lpar_hpte_find(unsigned long va, int psize) +static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize) { unsigned long hash; unsigned long i, j; long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift); - want_v = hpte_encode_v(va, psize); + hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_v(va, psize, ssize); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -463,14 +464,14 @@ static long pSeries_lpar_hpte_find(unsig static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, - int psize) + int psize, int ssize) { unsigned long lpar_rc, slot, vsid, va, flags; - vsid = get_kernel_vsid(ea); + vsid = get_kernel_vsid(ea, ssize); va = (vsid << 28) | (ea & 0x0fffffff); - slot = pSeries_lpar_hpte_find(va, psize); + slot = pSeries_lpar_hpte_find(va, psize, ssize); BUG_ON(slot == -1); flags = newpp & 7; @@ -480,7 +481,7 @@ static void pSeries_lpar_hpte_updatebolt } static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, - int psize, int local) + int psize, int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; @@ -489,7 +490,7 @@ static void pSeries_lpar_hpte_invalidate DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d", slot, va, psize, local); - want_v = hpte_encode_v(va, psize); + want_v = hpte_encode_v(va, psize, ssize); lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN, &dummy1, &dummy2); if (lpar_rc == H_Not_Found) @@ -514,7 +515,7 @@ void pSeries_lpar_flush_hash_range(unsig for (i = 0; i < number; i++) flush_hash_page(batch->vaddr[i], batch->pte[i], - batch->psize, local); + batch->psize, batch->ssize, local); if (lock_tlbie) spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); Index: linux-work/arch/powerpc/kernel/head_64.S =================================================================== --- linux-work.orig/arch/powerpc/kernel/head_64.S 2005-11-11 10:14:48.000000000 +1100 +++ linux-work/arch/powerpc/kernel/head_64.S 2005-12-13 10:32:33.000000000 +1100 @@ -984,7 +984,8 @@ _GLOBAL(slb_miss_realmode) .machine push .machine "power4" mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ + mtcrf 0x40,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0, cr1 and cr7 */ .machine pop #ifdef CONFIG_PPC_ISERIES @@ -1270,7 +1271,7 @@ _GLOBAL(do_stab_bolted) /* Calculate VSID */ /* This is a kernel address, so protovsid = ESID */ - ASM_VSID_SCRAMBLE(r11, r9) + ASM_VSID_SCRAMBLE(r11, r9, 256M) rldic r9,r11,12,16 /* r9 = vsid << 12 */ /* Search the primary group for a free entry */ Index: linux-work/arch/powerpc/mm/hugetlbpage.c =================================================================== --- linux-work.orig/arch/powerpc/mm/hugetlbpage.c 2005-12-12 17:33:39.000000000 +1100 +++ linux-work/arch/powerpc/mm/hugetlbpage.c 2005-12-13 13:01:38.000000000 +1100 @@ -27,7 +27,7 @@ #include -#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) +#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT_256M) #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) /* Modelled after find_linux_pte() */ @@ -174,7 +174,7 @@ static void flush_low_segments(void *par if (! (fi->newareas & (1U << i))) continue; asm volatile("slbie %0" - : : "r" ((i << SID_SHIFT) | SLBIE_C)); + : : "r" ((i << SID_SHIFT_256M) | SLBIE_C)); } asm volatile("isync" : : : "memory"); } @@ -184,7 +184,6 @@ static void flush_high_segments(void *pa struct slb_flush_info *fi = parm; unsigned long i, j; - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); if (current->active_mm != fi->mm) @@ -200,18 +199,26 @@ static void flush_high_segments(void *pa for (i = 0; i < NUM_HIGH_AREAS; i++) { if (! (fi->newareas & (1U << i))) continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); + if (fi->mm->context.segsize == MMU_SEGSIZE_1T) { + asm volatile("slbie %0" :: "r" + ((i << HTLB_AREA_SHIFT) | + (MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT) | + SLBIE_C)); + continue; + } + + for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT_256M)); j++) + asm volatile("slbie %0" :: "r" + (((i << HTLB_AREA_SHIFT) + + (j << SID_SHIFT_256M)) | SLBIE_C)); } asm volatile("isync" : : : "memory"); } static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) { - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; + unsigned long start = area << SID_SHIFT_256M; + unsigned long end = (area+1) << SID_SHIFT_256M; struct vm_area_struct *vma; BUG_ON(area >= NUM_LOW_AREAS); @@ -254,6 +261,10 @@ static int open_low_hpage_areas(struct m BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); + WARN_ON(mm->context.segsize < 0); + if (mm->context.segsize == MMU_SEGSIZE_1T) + return -EBUSY; + newareas &= ~(mm->context.low_htlb_areas); if (! newareas) return 0; /* The segments we want are already open */ @@ -285,6 +296,9 @@ static int open_high_hpage_areas(struct BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) != NUM_HIGH_AREAS); + WARN_ON(mm->context.segsize < 0); + if (mm->context.segsize == MMU_SEGSIZE_1T && (newareas & 1)) + return -EBUSY; newareas &= ~(mm->context.high_htlb_areas); if (! newareas) return 0; /* The areas we want are already open */ @@ -399,7 +413,7 @@ full_search: BUG_ON(vma && (addr >= vma->vm_end)); if (touches_hugepage_low_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1<= vma->vm_end)); /* invariant */ if (! __within_hugepage_low_range(addr, len, segmask)) { - addr = ALIGN(addr+1, 1<mm, addr); continue; } @@ -692,8 +706,8 @@ static unsigned int hash_huge_page_do_la } int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local, - unsigned long trap) + unsigned long ea, unsigned long vsid, int ssize, + int local, unsigned long trap) { pte_t *ptep; unsigned long old_pte, new_pte; @@ -704,7 +718,7 @@ int hash_huge_page(struct mm_struct *mm, ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ - va = (vsid << 28) | (ea & 0x0fffffff); + va = hpt_va(ea, vsid, ssize); /* * If no pte found or not present, send the problem up to @@ -755,19 +769,19 @@ int hash_huge_page(struct mm_struct *mm, /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, HPAGE_SHIFT); + hash = hpt_hash(va, HPAGE_SHIFT, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, - local) == -1) + ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, HPAGE_SHIFT); + unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -786,7 +800,7 @@ repeat: /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, - mmu_huge_psize); + mmu_huge_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -795,7 +809,7 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - mmu_huge_psize); + mmu_huge_psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * Index: linux-work/arch/powerpc/mm/pgtable_64.c =================================================================== --- linux-work.orig/arch/powerpc/mm/pgtable_64.c 2005-11-21 11:53:14.000000000 +1100 +++ linux-work/arch/powerpc/mm/pgtable_64.c 2005-12-13 10:32:33.000000000 +1100 @@ -116,6 +116,11 @@ static int map_io_page(unsigned long ea, set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); } else { + int ssize = MMU_SEGSIZE_256M; + + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_1T; + /* * If the mm subsystem is not fully up, we cannot create a * linux page table entry for this mapping. Simply bolt an @@ -123,7 +128,7 @@ static int map_io_page(unsigned long ea, * */ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, - mmu_virtual_psize)) { + mmu_virtual_psize, ssize)) { printk(KERN_ERR "Failed to do bolted mapping IO " "memory at %016lx !\n", pa); return -ENOMEM; Index: linux-work/arch/powerpc/mm/slb.c =================================================================== --- linux-work.orig/arch/powerpc/mm/slb.c 2005-11-08 11:00:17.000000000 +1100 +++ linux-work/arch/powerpc/mm/slb.c 2005-12-13 10:32:33.000000000 +1100 @@ -41,22 +41,17 @@ static void slb_allocate(unsigned long e slb_allocate_realmode(ea); } -static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) +static inline void create_slbe(unsigned long ea, unsigned long flags, + unsigned long entry, int ssize) { - return (ea & ESID_MASK) | SLB_ESID_V | slot; -} + unsigned long v = slb_mk_v(ea, flags, ssize); + unsigned long e = slb_mk_e(ea, entry, ssize); -static inline unsigned long mk_vsid_data(unsigned long ea, unsigned long flags) -{ - return (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | flags; -} + DBG("create_slbe(%016lx %016lx\n", e, v); -static inline void create_slbe(unsigned long ea, unsigned long flags, - unsigned long entry) -{ asm volatile("slbmte %0,%1" : - : "r" (mk_vsid_data(ea, flags)), - "r" (mk_esid_data(ea, entry)) + : "r" (v), + "r" (e) : "memory" ); } @@ -66,6 +61,7 @@ static void slb_flush_and_rebolt(void) * appropriately too. */ unsigned long linear_llp, virtual_llp, lflags, vflags; unsigned long ksp_esid_data; + int ssize; WARN_ON(!irqs_disabled()); @@ -74,9 +70,20 @@ static void slb_flush_and_rebolt(void) lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | virtual_llp; - ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); - if ((ksp_esid_data & ESID_MASK) == KERNELBASE) - ksp_esid_data &= ~SLB_ESID_V; + + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + ssize = MMU_SEGSIZE_1T; + ksp_esid_data = slb_mk_e(get_paca()->kstack, 2, + MMU_SEGSIZE_1T); + if ((ksp_esid_data & ESID_MASK_1T) == KERNELBASE) + ksp_esid_data &= ~SLB_ESID_V; + } else { + ssize = MMU_SEGSIZE_256M; + ksp_esid_data = slb_mk_e(get_paca()->kstack, 2, + MMU_SEGSIZE_256M); + if ((ksp_esid_data & ESID_MASK_256M) == KERNELBASE) + ksp_esid_data &= ~SLB_ESID_V; + } /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ @@ -87,9 +94,9 @@ static void slb_flush_and_rebolt(void) /* Slot 2 - kernel stack */ "slbmte %2,%3\n" "isync" - :: "r"(mk_vsid_data(VMALLOCBASE, vflags)), - "r"(mk_esid_data(VMALLOCBASE, 1)), - "r"(mk_vsid_data(ksp_esid_data, lflags)), + :: "r"(slb_mk_v(VMALLOCBASE, vflags, ssize)), + "r"(slb_mk_e(VMALLOCBASE, 1, ssize)), + "r"(slb_mk_v(ksp_esid_data, lflags, ssize)), "r"(ksp_esid_data) : "memory"); } @@ -102,19 +109,26 @@ void switch_slb(struct task_struct *tsk, unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; + int ssize; + unsigned long smask; if (offset <= SLB_CACHE_ENTRIES) { int i; + int ssize = get_paca()->context.segsize; + asm volatile("isync" : : : "memory"); for (i = 0; i < offset; i++) { + /* We use the 256M shift as this is how it's stored + * in the cache for both segment sizes + */ esid_data = ((unsigned long)get_paca()->slb_cache[i] - << SID_SHIFT) | SLBIE_C; + << SID_SHIFT_256M) | SLBIE_C | + ssize << SLBIE_SSIZE_SHIFT; asm volatile("slbie %0" : : "r" (esid_data)); } asm volatile("isync" : : : "memory"); - } else { + } else slb_flush_and_rebolt(); - } /* Workaround POWER5 < DD2.1 issue */ if (offset == 1 || offset > SLB_CACHE_ENTRIES) @@ -126,6 +140,14 @@ void switch_slb(struct task_struct *tsk, get_paca()->pgdir = mm->pgd; #endif /* CONFIG_PPC_64K_PAGES */ + /* Don't preload if the segment size for this context has not been + * chosen yet + */ + ssize = mm->context.segsize; + if (ssize == -1) + return; + smask = ssize ? ESID_MASK_1T : ESID_MASK_256M; + /* * preload some userspace segments into the SLB. */ @@ -138,20 +160,13 @@ void switch_slb(struct task_struct *tsk, return; slb_allocate(pc); - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - if (stack >= KERNELBASE) - return; - slb_allocate(stack); + if (stack < KERNELBASE && ((stack ^ pc) & smask)) + slb_allocate(stack); - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - if (unmapped_base >= KERNELBASE) - return; - slb_allocate(unmapped_base); + if (unmapped_base < KERNELBASE && + ((unmapped_base ^ pc) & smask) && + ((unmapped_base ^ stack) & smask)) + slb_allocate(unmapped_base); } static inline void patch_slb_encoding(unsigned int *insn_addr, @@ -205,6 +220,12 @@ void slb_initialize(void) #ifndef CONFIG_PPC_ISERIES { unsigned long lflags, vflags; + int ssize = MMU_SEGSIZE_256M; + + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_1T; + + DBG("SLB: segment size = %s\n", ssize ? "1TB" : "256MB"); lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | virtual_llp; @@ -213,10 +234,10 @@ void slb_initialize(void) asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, lflags, 0); + create_slbe(KERNELBASE, lflags, 0, ssize); /* VMALLOC space has 4K pages always for now */ - create_slbe(VMALLOCBASE, vflags, 1); + create_slbe(VMALLOCBASE, vflags, 1, ssize); /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes Index: linux-work/arch/powerpc/mm/slb_low.S =================================================================== --- linux-work.orig/arch/powerpc/mm/slb_low.S 2005-11-08 11:00:17.000000000 +1100 +++ linux-work/arch/powerpc/mm/slb_low.S 2005-12-13 12:53:11.000000000 +1100 @@ -36,12 +36,18 @@ _GLOBAL(slb_allocate_realmode) /* r3 = faulting address */ srdi r9,r3,60 /* get region */ - srdi r10,r3,28 /* get esid */ cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */ blt cr7,0f /* user or kernel? */ +BEGIN_FTR_SECTION + srdi r10,r3,SID_SHIFT_256M /* get esid */ +END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) +BEGIN_FTR_SECTION + srdi r10,r3,SID_SHIFT_1T /* get esid */ +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) + /* kernel address: proto-VSID = ESID */ /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but * this code will generate the protoVSID 0xfffffffff for the @@ -51,7 +57,7 @@ _GLOBAL(slb_allocate_realmode) /* Check if hitting the linear mapping of the vmalloc/ioremap * kernel space - */ + */ bne cr7,1f /* Linear mapping encoding bits, the "li" instruction below will @@ -59,23 +65,41 @@ _GLOBAL(slb_allocate_realmode) */ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 - b slb_finish_load +BEGIN_FTR_SECTION + b slb_finish_load_256M +END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) + b slb_finish_load_1T 1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below * will be patched by the kernel at boot */ _GLOBAL(slb_miss_kernel_load_virtual) li r11,0 - b slb_finish_load +BEGIN_FTR_SECTION + b slb_finish_load_256M +END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) + b slb_finish_load_1T + +0: /* User address */ + srdi r10,r3,SID_SHIFT_256M /* get 256M esid */ -0: /* user address: proto-VSID = context << 15 | ESID. First check + /* user address: proto-VSID = context << 15 | ESID. First check * if the address is within the boundaries of the user region */ srdi. r9,r10,USER_ESID_BITS bne- 8f /* invalid ea bits set */ - /* Figure out if the segment contains huge pages */ +BEGIN_FTR_SECTION + /* Copy 1T segment indication in cr1.eq */ + lwz r9,PACASEGSIZE(r13) + cmpi cr1,r9,MMU_SEGSIZE_1T +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) + + /* Figure out if the segment contains huge pages, we leave to + * the C code the responsibility of not setting invalid combinations + * here for 1T segments 64 bits processes + */ #ifdef CONFIG_HUGETLB_PAGE BEGIN_FTR_SECTION b 1f @@ -87,7 +111,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) blt 5f lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) + srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT_256M) 5: srd r9,r9,r11 andi. r9,r9,1 @@ -104,12 +128,18 @@ _GLOBAL(slb_miss_user_load_normal) 2: ld r9,PACACONTEXTID(r13) rldimi r10,r9,USER_ESID_BITS,0 - b slb_finish_load +BEGIN_FTR_SECTION + beq cr1,3f +END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) + b slb_finish_load_256M +3: + srdi r10,r10,(VSID_BITS_256M - VSID_BITS_1T) + b slb_finish_load_1T 8: /* invalid EA */ li r10,0 /* BAD_VSID */ li r11,SLB_VSID_USER /* flags don't much matter */ - b slb_finish_load + b slb_finish_load_256M /* XXX FIXME */ #ifdef __DISABLED__ @@ -168,9 +198,82 @@ _GLOBAL(slb_allocate_user) * * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE */ -slb_finish_load: - ASM_VSID_SCRAMBLE(r10,r9) - rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */ +slb_finish_load_1T: + ASM_VSID_SCRAMBLE(r10,r9, 1T) + rldimi r11,r10,SLB_VSID_SHIFT_1T,16 /* combine VSID and flags */ + li r10,MMU_SEGSIZE_1T + rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ +1: + /* r3 = EA, r11 = VSID data */ + /* + * Find a slot, round robin. Previously we tried to find a + * free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ +#ifdef CONFIG_PPC_ISERIES + /* + * On iSeries, the "bolted" stack segment can be cast out on + * shared processor switch so we need to check for a miss on + * it and restore it to the right slot. + */ + ld r9,PACAKSAVE(r13) + clrrdi r9,r9,SID_SHIFT_1T + clrrdi r3,r3,SID_SHIFT_1T + li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ + cmpld r9,r3 + beq 3f +#endif /* CONFIG_PPC_ISERIES */ + + ld r10,PACASTABRR(r13) + addi r10,r10,1 + /* use a cpu feature mask if we ever change our slb size */ + cmpldi r10,SLB_NUM_ENTRIES + + blt+ 4f + li r10,SLB_NUM_BOLTED + +4: + std r10,PACASTABRR(r13) + +3: + rldimi r3,r10,0,64-SID_SHIFT_1T/* r3= EA[0:23] | entry */ + oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ + + /* r3 = ESID data, r11 = VSID data */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r11,r10 + + /* we're done for kernel addresses */ + crclr 4*cr0+eq /* set result to "success" */ + bgelr cr7 + + /* Update the slb cache */ + lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r3,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ + rldicl r10,r10,36,28 /* get high 36 bits of the ESID */ + add r11,r11,r13 /* r11 = (u16 *)paca + offset */ + sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r3,r3,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r3,SLB_CACHE_ENTRIES+1 +2: + sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + crclr 4*cr0+eq /* set result to "success" */ + blr + + +slb_finish_load_256M: + ASM_VSID_SCRAMBLE(r10,r9, 256M) + rldimi r11,r10,SLB_VSID_SHIFT_256M,16 /* combine VSID and flags */ /* r3 = EA, r11 = VSID data */ /* @@ -185,8 +288,8 @@ slb_finish_load: * it and restore it to the right slot. */ ld r9,PACAKSAVE(r13) - clrrdi r9,r9,28 - clrrdi r3,r3,28 + clrrdi r9,r9,SID_SHIFT_256M + clrrdi r3,r3,SID_SHIFT_256M li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ cmpld r9,r3 beq 3f @@ -204,7 +307,7 @@ slb_finish_load: std r10,PACASTABRR(r13) 3: - rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */ + rldimi r3,r10,0,64-SID_SHIFT_256M /* r3= EA[0:35] | entry */ oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ /* r3 = ESID data, r11 = VSID data */ @@ -226,7 +329,7 @@ slb_finish_load: /* still room in the slb cache */ sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ - rldicl r10,r10,36,28 /* get low 16 bits of the ESID */ + rldicl r10,r10,36,28 /* get high 36 bits of the ESID */ add r11,r11,r13 /* r11 = (u16 *)paca + offset */ sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ addi r3,r3,1 /* offset++ */ Index: linux-work/arch/powerpc/mm/stab.c =================================================================== --- linux-work.orig/arch/powerpc/mm/stab.c 2005-12-12 17:33:39.000000000 +1100 +++ linux-work/arch/powerpc/mm/stab.c 2005-12-13 10:32:33.000000000 +1100 @@ -40,10 +40,10 @@ static int make_ste(unsigned long stab, unsigned long entry, group, old_esid, castout_entry, i; unsigned int global_entry; struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= KERNELBASE; + unsigned long kernel_segment = (esid << SID_SHIFT_256M) >= KERNELBASE; vsid_data = vsid << STE_VSID_SHIFT; - esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; + esid_data = esid << SID_SHIFT_256M | STE_ESID_KP | STE_ESID_V; if (! kernel_segment) esid_data |= STE_ESID_KS; @@ -83,7 +83,7 @@ static int make_ste(unsigned long stab, } /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != KERNELBASE) + if ((castout_ste->esid_data & ESID_MASK_256M) != KERNELBASE) break; castout_entry = (castout_entry + 1) & 0xf; @@ -96,7 +96,7 @@ static int make_ste(unsigned long stab, /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - old_esid = castout_ste->esid_data >> SID_SHIFT; + old_esid = castout_ste->esid_data >> SID_SHIFT_256M; castout_ste->esid_data = 0; /* Invalidate old entry */ asm volatile("sync" : : : "memory"); /* Order update */ @@ -105,7 +105,7 @@ static int make_ste(unsigned long stab, asm volatile("eieio" : : : "memory"); /* Order update */ castout_ste->esid_data = esid_data; - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); + asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT_256M)); /* Ensure completion of slbie */ asm volatile("sync" : : : "memory"); @@ -123,15 +123,15 @@ static int __ste_allocate(unsigned long /* Kernel or user address? */ if (ea >= KERNELBASE) { - vsid = get_kernel_vsid(ea); + vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); } else { if ((ea >= TASK_SIZE_USER64) || (! mm)) return 1; - vsid = get_vsid(mm->context.id, ea); + vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M); } - stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); + stab_entry = make_ste(get_paca()->stab_addr, GET_ESID_256M(ea), vsid); if (ea < KERNELBASE) { offset = __get_cpu_var(stab_cache_ptr); @@ -189,7 +189,7 @@ void switch_stab(struct task_struct *tsk entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); entry++, ste++) { unsigned long ea; - ea = ste->esid_data & ESID_MASK; + ea = ste->esid_data & ESID_MASK_256M; if (ea < KERNELBASE) { ste->esid_data = 0; } @@ -212,13 +212,13 @@ void switch_stab(struct task_struct *tsk __ste_allocate(pc, mm); - if (GET_ESID(pc) == GET_ESID(stack)) + if (GET_ESID_256M(pc) == GET_ESID_256M(stack)) return; __ste_allocate(stack, mm); - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) + if ((GET_ESID_256M(pc) == GET_ESID_256M(unmapped_base)) + || (GET_ESID_256M(stack) == GET_ESID_256M(unmapped_base))) return; __ste_allocate(unmapped_base, mm); @@ -246,7 +246,7 @@ void stabs_alloc(void) continue; /* stab for CPU 0 is statically allocated */ newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, - 1<> REGION_SHIFT) /* Segment size */ -#define SID_SHIFT 28 -#define SID_MASK 0xfffffffffUL -#define ESID_MASK 0xfffffffff0000000UL -#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) +#define SID_SHIFT_256M 28 +#define SID_MASK_256M 0xfffffffffUL +#define ESID_MASK_256M 0xfffffffff0000000UL +#define GET_ESID_256M(x) (((x) >> SID_SHIFT_256M) & SID_MASK_256M) + +#define SID_SHIFT_1T 40 +#define SID_MASK_1T 0xffffffUL +#define ESID_MASK_1T 0xffffff0000000000UL +#define GET_ESID_1T(x) (((x) >> SID_SHIFT_1T) & SID_MASK_1T) #ifndef __ASSEMBLY__ #include @@ -104,8 +109,8 @@ extern unsigned int HPAGE_SHIFT; #define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) #define LOW_ESID_MASK(addr, len) \ - (((1U << (GET_ESID(min((addr)+(len)-1, 0x100000000UL))+1)) \ - - (1U << GET_ESID(min((addr), 0x100000000UL)))) & 0xffff) + (((1U << (GET_ESID_256M(min((addr)+(len)-1, 0x100000000UL))+1)) \ + - (1U << GET_ESID_256M(min((addr), 0x100000000UL)))) & 0xffff) #define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ - (1U << GET_HTLB_AREA(addr))) & 0xffff) @@ -142,7 +147,7 @@ extern unsigned int HPAGE_SHIFT; (cpu_has_feature(CPU_FTR_16M_PAGE) && \ ( ( (addr) >= 0x100000000UL) \ ? ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) \ - : ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) + : ((1 << GET_ESID_256M(addr)) & (context).low_htlb_areas) ) ) #else /* !CONFIG_HUGETLB_PAGE */ Index: linux-work/include/asm-powerpc/pgtable.h =================================================================== --- linux-work.orig/include/asm-powerpc/pgtable.h 2005-11-21 11:53:15.000000000 +1100 +++ linux-work/include/asm-powerpc/pgtable.h 2005-12-13 10:32:33.000000000 +1100 @@ -39,7 +39,7 @@ struct mm_struct; #error TASK_SIZE_USER64 exceeds pagetable range #endif -#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT_256M)) #error TASK_SIZE_USER64 exceeds user VSID range #endif Index: linux-work/include/asm-powerpc/mmu.h =================================================================== --- linux-work.orig/include/asm-powerpc/mmu.h 2005-12-12 17:33:39.000000000 +1100 +++ linux-work/include/asm-powerpc/mmu.h 2005-12-13 10:32:33.000000000 +1100 @@ -50,7 +50,9 @@ extern char initial_stab[]; #define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */ /* Bits in the SLB VSID word */ -#define SLB_VSID_SHIFT 12 +#define SLB_VSID_SHIFT_1T 24 +#define SLB_VSID_SHIFT_256M 12 +#define SLB_VSID_SSIZE_SHIFT 62 #define SLB_VSID_B ASM_CONST(0xc000000000000000) #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) @@ -70,6 +72,7 @@ extern char initial_stab[]; #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) #define SLBIE_C (0x08000000) +#define SLBIE_SSIZE_SHIFT 25 /* * Hash table @@ -152,6 +155,13 @@ struct mmu_psize_def #define MMU_PAGE_16G 5 /* 16G */ #define MMU_PAGE_COUNT 6 +/* + * Supported segment sizes + */ +#define MMU_SEGSIZE_256M 0x00 +#define MMU_SEGSIZE_1T 0x01 + + #ifndef __ASSEMBLY__ /* @@ -173,13 +183,15 @@ extern int mmu_huge_psize; * This function sets the AVPN and L fields of the HPTE appropriately * for the page size */ -static inline unsigned long hpte_encode_v(unsigned long va, int psize) +static inline unsigned long hpte_encode_v(unsigned long va, + int psize, int ssize) { unsigned long v = v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; if (psize != MMU_PAGE_4K) v |= HPTE_V_LARGE; + v |= ((unsigned long)ssize) << 62; return v; } @@ -204,29 +216,50 @@ static inline unsigned long hpte_encode_ } /* - * This hashes a virtual address for a 256Mb segment only for now + * Builds a va for a segment size */ -static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) +static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, + int ssize) { - return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); + if (ssize == MMU_SEGSIZE_1T) + return (vsid << 40) | (ea & 0xffffffffffUL); + return (vsid << 28) | (ea & 0xfffffffUL); +} + +/* + * This hashes a virtual address + */ + +static inline unsigned long hpt_hash(unsigned long va, + unsigned int shift, int ssize) +{ + if (ssize == MMU_SEGSIZE_1T) + return ((va >> 40) & 0x7ffffffUL) ^ + ((va & 0xffffffffffUL) >> shift); + return ((va >> 28) & 0x7fffffffffUL) ^ + ((va & 0xfffffffUL) >> shift); } extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local); + int ssize, unsigned int local); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local); + int ssize, unsigned int local); struct mm_struct; extern int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local, - unsigned long trap); + unsigned long ea, unsigned long vsid, int ssize, + int local, unsigned long trap); + +extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, + int ssize, int local); +extern void flush_hash_range(unsigned long number, int local); extern void htab_finish_init(void); extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long mode, - int psize); + int psize, int ssize); extern void htab_initialize(void); extern void htab_initialize_secondary(void); @@ -238,17 +271,20 @@ extern void mm_init_ppc64(void); extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long rflags, - unsigned long vflags, int psize); + unsigned long vflags, + int psize, int ssize); extern long native_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long rflags, - unsigned long vflags, int psize); + unsigned long vflags, + int psize, int ssize); extern long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long rflags, - unsigned long vflags, int psize); + unsigned long vflags, + int psize, int ssize); extern void stabs_alloc(void); extern void slb_initialize(void); @@ -307,14 +343,18 @@ extern void stab_initialize(unsigned lon * which are used by the iSeries firmware. */ -#define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */ -#define VSID_BITS 36 -#define VSID_MODULUS ((1UL<= \ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ @@ -345,7 +385,7 @@ extern void stab_initialize(unsigned lon * doesn't, the answer is the low 36 bits of r3+1. So in all \ * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ addi rx,rt,1; \ - srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^36 bit */ \ add rt,rt,rx @@ -358,41 +398,73 @@ typedef struct { #ifdef CONFIG_HUGETLB_PAGE u16 low_htlb_areas, high_htlb_areas; #endif + int segsize; } mm_context_t; -static inline unsigned long vsid_scramble(unsigned long protovsid) -{ +/* The code below is equivalent to this function for arguments + * < 2^VSID_BITS, which is all this should ever be called + * with. However gcc is not clever enough to compute the + * modulus (2^n-1) without a second multiply. */ + #if 0 - /* The code below is equivalent to this function for arguments - * < 2^VSID_BITS, which is all this should ever be called - * with. However gcc is not clever enough to compute the - * modulus (2^n-1) without a second multiply. */ - return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); +#define vsid_scrample(protovsid, size) \ + ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) #else /* 1 */ - unsigned long x; - - x = protovsid * VSID_MULTIPLIER; - x = (x >> VSID_BITS) + (x & VSID_MODULUS); - return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; +#define vsid_scramble(protovsid, size) \ + ({ \ + unsigned long x; \ + x = (protovsid) * VSID_MULTIPLIER_##size; \ + x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ + (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ + }) #endif /* 1 */ -} /* This is only valid for addresses >= KERNELBASE */ -static inline unsigned long get_kernel_vsid(unsigned long ea) +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) { - return vsid_scramble(ea >> SID_SHIFT); + if (ssize == MMU_SEGSIZE_1T) + return vsid_scramble(ea >> SID_SHIFT_1T, 1T); + else + return vsid_scramble(ea >> SID_SHIFT_256M, 256M); } /* This is only valid for user addresses (which are below 2^41) */ -static inline unsigned long get_vsid(unsigned long context, unsigned long ea) +static inline unsigned long get_vsid(unsigned long context, + unsigned long ea, int ssize) { - return vsid_scramble((context << USER_ESID_BITS) - | (ea >> SID_SHIFT)); + unsigned long protovsid = (context << USER_ESID_BITS) + | (ea >> SID_SHIFT_256M); + if (ssize == MMU_SEGSIZE_1T) + return vsid_scramble((protovsid + >> (VSID_BITS_256M - VSID_BITS_1T)), 1T); + else + return vsid_scramble((protovsid), 256M); } -#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS) -#define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) +static inline unsigned long slb_mk_e(unsigned long ea, + unsigned long slot, int ssize) +{ + if (ssize == MMU_SEGSIZE_1T) + return (ea & ESID_MASK_1T) | SLB_ESID_V | slot; + return (ea & ESID_MASK_256M) | SLB_ESID_V | slot; +} + +static inline unsigned long slb_mk_v(unsigned long ea, + unsigned long flags, int ssize) +{ + if (ssize == MMU_SEGSIZE_1T) + return (get_kernel_vsid(ea, ssize) << SLB_VSID_SHIFT_1T) + | (((unsigned long)ssize) << SLB_VSID_SSIZE_SHIFT) + | flags; + return (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT_256M) + | flags; +} + + +#define VSID_SCRAMBLE_ISERIES(pvsid) \ + (((pvsid) * VSID_MULTIPLIER_256M) % VSID_MODULUS_256M) +#define KERNEL_VSID_ISERIES(ea) VSID_SCRAMBLE_ISERIES(GET_ESID(ea)) #endif /* __ASSEMBLY */ Index: linux-work/include/asm-powerpc/tlbflush.h =================================================================== --- linux-work.orig/include/asm-powerpc/tlbflush.h 2005-11-08 11:00:19.000000000 +1100 +++ linux-work/include/asm-powerpc/tlbflush.h 2005-12-13 10:32:33.000000000 +1100 @@ -33,7 +33,7 @@ struct ppc64_tlb_batch { struct mm_struct *mm; real_pte_t pte[PPC64_TLB_BATCH_NR]; unsigned long vaddr[PPC64_TLB_BATCH_NR]; - unsigned int psize; + int psize; int ssize; }; DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); @@ -48,10 +48,6 @@ static inline void flush_tlb_pending(voi put_cpu_var(ppc64_tlb_batch); } -extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, - int local); -extern void flush_hash_range(unsigned long number, int local); - #else /* CONFIG_PPC64 */ #include Index: linux-work/arch/powerpc/kernel/asm-offsets.c =================================================================== --- linux-work.orig/arch/powerpc/kernel/asm-offsets.c 2005-11-15 13:31:57.000000000 +1100 +++ linux-work/arch/powerpc/kernel/asm-offsets.c 2005-12-13 10:32:33.000000000 +1100 @@ -124,6 +124,7 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); + DEFINE(PACASEGSIZE, offsetof(struct paca_struct, context.segsize)); #ifdef CONFIG_PPC_64K_PAGES DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); #endif Index: linux-work/arch/powerpc/mm/mmu_context_64.c =================================================================== --- linux-work.orig/arch/powerpc/mm/mmu_context_64.c 2005-11-01 14:13:52.000000000 +1100 +++ linux-work/arch/powerpc/mm/mmu_context_64.c 2005-12-13 10:32:33.000000000 +1100 @@ -49,7 +49,7 @@ again: } mm->context.id = index; - + mm->context.segsize = -1; return 0; } Index: linux-work/mm/vmalloc.c =================================================================== --- linux-work.orig/mm/vmalloc.c 2005-11-08 11:00:20.000000000 +1100 +++ linux-work/mm/vmalloc.c 2005-12-13 10:32:33.000000000 +1100 @@ -94,6 +94,10 @@ static int vmap_pte_range(pmd_t *pmd, un return -ENOMEM; do { struct page *page = **pages; + if (!pte_none(*pte)) { + udbg_printf("vmalloc error, addr: %lx, pte @ %p: %lx\n", + addr, pte, pte_val(*pte)); + } WARN_ON(!pte_none(*pte)); if (!page) return -ENOMEM;