From: Linus Torvalds Date: Fri, 25 Jul 2008 18:08:17 +0000 (-0700) Subject: Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc X-Git-Tag: v2.6.27-rc1~222 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5047887caf1806f31652210df27fb62a7c43f27d;hp=996abf053eec4d67136be8b911bbaaf989cfb99c;p=linux-2.6 Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc * 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (34 commits) powerpc: Wireup new syscalls Move update_mmu_cache() declaration from tlbflush.h to pgtable.h powerpc/pseries: Remove kmalloc call in handling writes to lparcfg powerpc/pseries: Update arch vector to indicate support for CMO ibmvfc: Add support for collaborative memory overcommit ibmvscsi: driver enablement for CMO ibmveth: enable driver for CMO ibmveth: Automatically enable larger rx buffer pools for larger mtu powerpc/pseries: Verify CMO memory entitlement updates with virtual I/O powerpc/pseries: vio bus support for CMO powerpc/pseries: iommu enablement for CMO powerpc/pseries: Add CMO paging statistics powerpc/pseries: Add collaborative memory manager powerpc/pseries: Utilities to set firmware page state powerpc/pseries: Enable CMO feature during platform setup powerpc/pseries: Split retrieval of processor entitlement data into a helper routine powerpc/pseries: Add memory entitlement capabilities to /proc/ppc64/lparcfg powerpc/pseries: Split processor entitlement retrieval and gathering to helper routines powerpc/pseries: Remove extraneous error reporting for hcall failures in lparcfg powerpc: Fix compile error with binutils 2.15 ... Fixed up conflict in arch/powerpc/platforms/52xx/Kconfig manually. --- diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index ea1b70b357..99514ced82 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -59,6 +59,7 @@ Table of Contents p) Freescale Synchronous Serial Interface q) USB EHCI controllers r) MDIO on GPIOs + s) SPI busses VII - Marvell Discovery mv64[345]6x System Controller chips 1) The /system-controller node @@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model. &qe_pio_c 6>; }; + s) SPI (Serial Peripheral Interface) busses + + SPI busses can be described with a node for the SPI master device + and a set of child nodes for each SPI slave on the bus. For this + discussion, it is assumed that the system's SPI controller is in + SPI master mode. This binding does not describe SPI controllers + in slave mode. + + The SPI master node requires the following properties: + - #address-cells - number of cells required to define a chip select + address on the SPI bus. + - #size-cells - should be zero. + - compatible - name of SPI bus controller following generic names + recommended practice. + No other properties are required in the SPI bus node. It is assumed + that a driver for an SPI bus device will understand that it is an SPI bus. + However, the binding does not attempt to define the specific method for + assigning chip select numbers. Since SPI chip select configuration is + flexible and non-standardized, it is left out of this binding with the + assumption that board specific platform code will be used to manage + chip selects. Individual drivers can define additional properties to + support describing the chip select layout. + + SPI slave nodes must be children of the SPI master node and can + contain the following properties. + - reg - (required) chip select address of device. + - compatible - (required) name of SPI device following generic names + recommended practice + - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz + - spi-cpol - (optional) Empty property indicating device requires + inverse clock polarity (CPOL) mode + - spi-cpha - (optional) Empty property indicating device requires + shifted clock phase (CPHA) mode + + SPI example for an MPC5200 SPI bus: + spi@f00 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi"; + reg = <0xf00 0x20>; + interrupts = <2 13 0 2 14 0>; + interrupt-parent = <&mpc5200_pic>; + + ethernet-switch@0 { + compatible = "micrel,ks8995m"; + spi-max-frequency = <1000000>; + reg = <0>; + }; + + codec@1 { + compatible = "ti,tlv320aic26"; + spi-max-frequency = <100000>; + reg = <1>; + }; + }; + VII - Marvell Discovery mv64[345]6x System Controller chips =========================================================== diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b936a1dd0a..25a052c167 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -23,6 +23,9 @@ struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); +/* The platform string corresponding to the real PVR */ +const char *powerpc_base_platform; + /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's * the responsibility of the appropriate CPU save/restore functions to @@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) } else *t = *s; *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; + + /* + * Set the base platform string once; assumes + * we're called with real pvr first. + */ + if (powerpc_base_platform == NULL) + powerpc_base_platform = t->platform; + #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE) /* ppc64 and booke expect identify_cpu to also call * setup_cpu for that processor. I will consolidate diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index da52269aec..81c8324a4a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -148,7 +148,7 @@ transfer_to_handler: /* Check to see if the dbcr0 register is set up to debug. Use the internal debug mode bit to do this. */ lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h + andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ li r12,-1 /* clear all pending debug events */ @@ -292,7 +292,7 @@ syscall_exit_cont: /* If the process has its own DBCR0 value, load it up. The internal debug mode bit tells us that dbcr0 should be loaded. */ lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h + andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h bnel- load_dbcr0 #endif #ifdef CONFIG_44x @@ -720,7 +720,7 @@ restore_user: /* Check whether this process has its own DBCR0 value. The internal debug mode bit tells us that dbcr0 should be loaded. */ lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h + andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h bnel- load_dbcr0 #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 2385f68c17..550a19399b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -49,6 +49,8 @@ static int novmerge = 1; static int protect4gb = 1; +static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); + static inline unsigned long iommu_num_pages(unsigned long vaddr, unsigned long slen) { @@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, { unsigned long entry, flags; dma_addr_t ret = DMA_ERROR_CODE; + int build_fail; spin_lock_irqsave(&(tbl->it_lock), flags); @@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ /* Put the TCEs in the HW table */ - ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK, - direction, attrs); + build_fail = ppc_md.tce_build(tbl, entry, npages, + (unsigned long)page & IOMMU_PAGE_MASK, + direction, attrs); + + /* ppc_md.tce_build() only returns non-zero for transient errors. + * Clean up the table bitmap in this case and return + * DMA_ERROR_CODE. For all other errors the functionality is + * not altered. + */ + if (unlikely(build_fail)) { + __iommu_free(tbl, ret, npages); + spin_unlock_irqrestore(&(tbl->it_lock), flags); + return DMA_ERROR_CODE; + } /* Flush/invalidate TLB caches if necessary */ if (ppc_md.tce_flush) @@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, dma_addr_t dma_next = 0, dma_addr; unsigned long flags; struct scatterlist *s, *outs, *segstart; - int outcount, incount, i; + int outcount, incount, i, build_fail = 0; unsigned int align; unsigned long handle; unsigned int max_seg_size; @@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages, entry, dma_addr); /* Insert into HW table */ - ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, - direction, attrs); + build_fail = ppc_md.tce_build(tbl, entry, npages, + vaddr & IOMMU_PAGE_MASK, + direction, attrs); + if(unlikely(build_fail)) + goto failure; /* If we are in an open segment, try merging */ if (segstart != s) { diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 827a5726a0..9f856a0c3e 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -34,8 +34,9 @@ #include #include #include +#include -#define MODULE_VERS "1.7" +#define MODULE_VERS "1.8" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -static void log_plpar_hcall_return(unsigned long rc, char *tag) +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) { - switch(rc) { - case 0: - return; - case H_HARDWARE: - printk(KERN_INFO "plpar-hcall (%s) " - "Hardware fault\n", tag); - return; - case H_FUNCTION: - printk(KERN_INFO "plpar-hcall (%s) " - "Function not allowed\n", tag); - return; - case H_AUTHORITY: - printk(KERN_INFO "plpar-hcall (%s) " - "Not authorized to this function\n", tag); - return; - case H_PARAMETER: - printk(KERN_INFO "plpar-hcall (%s) " - "Bad parameter(s)\n",tag); - return; - default: - printk(KERN_INFO "plpar-hcall (%s) " - "Unexpected rc(0x%lx)\n", tag, rc); - } + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; } +EXPORT_SYMBOL(h_get_mpp); + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; +}; /* * H_GET_PPP hcall returns info in 4 parms. @@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag) * XXXX - Active processors in Physical Processor Pool. * XXXX - Processors active on platform. */ -static unsigned int h_get_ppp(unsigned long *entitled, - unsigned long *unallocated, - unsigned long *aggregation, - unsigned long *resource) +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; rc = plpar_hcall(H_GET_PPP, retbuf); - *entitled = retbuf[0]; - *unallocated = retbuf[1]; - *aggregation = retbuf[2]; - *resource = retbuf[3]; + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; + + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; - log_plpar_hcall_return(rc, "H_GET_PPP"); + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; return rc; } -static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) { unsigned long rc; unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; @@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) *pool_idle_time = retbuf[0]; *num_procs = retbuf[1]; - if (rc != H_AUTHORITY) - log_plpar_hcall_return(rc, "H_PIC"); + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%ld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are apropriate for shared configs */ + if (lppaca[0].shared_proc) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%ld\n", + ppp_data.unallocated_entitlement); +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } #define SPLPAR_CHARACTERISTICS_TOKEN 20 @@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void) return count; } +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += lppaca[cpu].cmo_faults; + cmo_fault_time += lppaca[cpu].cmo_fault_time; + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - unsigned long h_entitled, h_unallocated; - unsigned long h_aggregation, h_resource; - unsigned long pool_idle_time, pool_procs; - unsigned long purr; - - h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, - &h_resource); - - seq_printf(m, "R4=0x%lx\n", h_entitled); - seq_printf(m, "R5=0x%lx\n", h_unallocated); - seq_printf(m, "R6=0x%lx\n", h_aggregation); - seq_printf(m, "R7=0x%lx\n", h_resource); - - purr = get_purr(); - /* this call handles the ibm,get-system-parameter contents */ parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + pseries_cmo_data(m); - seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); - - seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); - - seq_printf(m, "system_active_processors=%ld\n", - (h_resource >> 0 * 8) & 0xffff); - - /* pool related entries are apropriate for shared configs */ - if (lppaca[0].shared_proc) { - - h_pic(&pool_idle_time, &pool_procs); - - seq_printf(m, "pool=%ld\n", - (h_aggregation >> 0 * 8) & 0xffff); - - /* report pool_capacity in percentage */ - seq_printf(m, "pool_capacity=%ld\n", - ((h_resource >> 2 * 8) & 0xffff) * 100); - - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); - } - - seq_printf(m, "unallocated_capacity_weight=%ld\n", - (h_resource >> 4 * 8) & 0xFF); - - seq_printf(m, "capacity_weight=%ld\n", - (h_resource >> 5 * 8) & 0xFF); - - seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); - - seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); - - seq_printf(m, "purr=%ld\n", purr); - + seq_printf(m, "purr=%ld\n", get_purr()); } else { /* non SPLPAR case */ seq_printf(m, "system_active_processors=%d\n", @@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) return 0; } +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + __FUNCTION__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + /* * Interface for changing system parameters (variable capacity weight * and entitled capacity). Format of input is "param_name=value"; @@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) static ssize_t lparcfg_write(struct file *file, const char __user * buf, size_t count, loff_t * off) { - char *kbuf; + int kbuf_sz = 64; + char kbuf[kbuf_sz]; char *tmp; u64 new_entitled, *new_entitled_ptr = &new_entitled; u8 new_weight, *new_weight_ptr = &new_weight; - - unsigned long current_entitled; /* parameters for h_get_ppp */ - unsigned long dummy; - unsigned long resource; - u8 current_weight; - - ssize_t retval = -ENOMEM; + ssize_t retval; if (!firmware_has_feature(FW_FEATURE_SPLPAR) || firmware_has_feature(FW_FEATURE_ISERIES)) return -EINVAL; - kbuf = kmalloc(count, GFP_KERNEL); - if (!kbuf) - goto out; + if (count > kbuf_sz) + return -EINVAL; - retval = -EFAULT; if (copy_from_user(kbuf, buf, count)) - goto out; + return -EFAULT; - retval = -EINVAL; kbuf[count - 1] = '\0'; tmp = strchr(kbuf, '='); if (!tmp) - goto out; + return -EINVAL; *tmp++ = '\0'; @@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, char *endp; *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_weight_ptr = ¤t_weight; + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); } else if (!strcmp(kbuf, "capacity_weight")) { char *endp; *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); if (endp == tmp) - goto out; - new_entitled_ptr = ¤t_entitled; - } else - goto out; - - /* Get our current parameters */ - retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); - if (retval) { - retval = -EIO; - goto out; - } - - current_weight = (resource >> 5 * 8) & 0xFF; + return -EINVAL; - pr_debug("%s: current_entitled = %lu, current_weight = %u\n", - __func__, current_entitled, current_weight); + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - pr_debug("%s: new_entitled = %lu, new_weight = %u\n", - __func__, *new_entitled_ptr, *new_weight_ptr); + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; - retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, - *new_weight_ptr); + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; if (retval == H_SUCCESS || retval == H_CONSTRAINED) { retval = count; @@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, retval = -EIO; } -out: - kfree(kbuf); return retval; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 219f363411..db2497ccc1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -47,6 +47,8 @@ #ifdef CONFIG_PPC64 #include #endif +#include +#include extern unsigned long _get_SP(void); @@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DAC and struct entries. One shot trigger */ +#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE)) + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W + | DBCR0_IDM)); +#endif + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} + static DEFINE_PER_CPU(unsigned long, current_dabr); int set_dabr(unsigned long dabr) @@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr) #if defined(CONFIG_PPC64) || defined(CONFIG_6xx) mtspr(SPRN_DABR, dabr); #endif + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DAC1, dabr); +#endif + return 0; } @@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev, if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + /* If new thread DAC (HW breakpoint) is the same then leave it */ + if (new->thread.dabr) + set_dabr(new->thread.dabr); +#endif + new_thread = &new->thread; old_thread = ¤t->thread; @@ -525,6 +567,10 @@ void flush_thread(void) if (current->thread.dabr) { current->thread.dabr = 0; set_dabr(0); + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); +#endif } } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1ea8c8d3ce..c4ab2195b9 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void) #else #define OV5_MSI 0x00 #endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_PPC_SMLPAR +#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#else +#define OV5_CMO 0x00 +#endif /* * The architecture vector has an array of PVR mask/value pairs, @@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 3 - 2, /* length */ + 5 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, + 0, + OV5_CMO, }; /* Old method - ELF header with PT_NOTE sections */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8feb93e789..a5d0e78779 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; + task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr |= MSR_SE; @@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; + +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + /* If DAC then do not single step, skip */ + if (task->thread.dabr) + return; +#endif + if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - task->thread.dbcr0 = 0; + task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM); regs->msr &= ~MSR_DE; #else regs->msr &= ~MSR_SE; @@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task) clear_tsk_thread_flag(task, TIF_SINGLESTEP); } -static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, +int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { - /* We only support one DABR and no IABRS at the moment */ + /* For ppc64 we support one DABR and no IABR's at the moment (ppc64). + * For embedded processors we support one DAC and no IAC's at the + * moment. + */ if (addr > 0) return -EINVAL; - /* The bottom 3 bits are flags */ if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; - /* Ensure translation is on */ +#ifdef CONFIG_PPC64 + + /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. + * It was assumed, on previous implementations, that 3 bits were + * passed together with the data address, fitting the design of the + * DABR register, as follows: + * + * bit 0: Read flag + * bit 1: Write flag + * bit 2: Breakpoint translation + * + * Thus, we use them here as so. + */ + + /* Ensure breakpoint translation bit is set */ if (data && !(data & DABR_TRANSLATION)) return -EIO; + /* Move contents to the DABR register */ task->thread.dabr = data; + +#endif +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + + /* As described above, it was assumed 3 bits were passed with the data + * address, but we will assume only the mode bits will be passed + * as to not cause alignment restrictions for DAC-based processors. + */ + + /* DAC's hold the whole address without any mode flags */ + task->thread.dabr = data & ~0x3UL; + + if (task->thread.dabr == 0) { + task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); + task->thread.regs->msr &= ~MSR_DE; + return 0; + } + + /* Read or Write bits must be set */ + + if (!(data & 0x3UL)) + return -EINVAL; + + /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 + register */ + task->thread.dbcr0 = DBCR0_IDM; + + /* Check for write and read flags and set DBCR0 + accordingly */ + if (data & 0x1UL) + task->thread.dbcr0 |= DBSR_DAC1R; + if (data & 0x2UL) + task->thread.dbcr0 |= DBSR_DAC1W; + + task->thread.regs->msr |= MSR_DE; +#endif return 0; } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index ad55488939..7aada783ec 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) + if (current->thread.dabr) { set_dabr(current->thread.dabr); +#if defined(CONFIG_44x) || defined(CONFIG_BOOKE) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +#endif + } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index aba0ba95f0..800e5e9a08 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -529,7 +529,8 @@ static void register_nodes(void) #endif /* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, char *buf) +static ssize_t show_physical_id(struct sys_device *dev, + struct sysdev_attribute *attr, char *buf) { struct cpu *cpu = container_of(dev, struct cpu, sysdev); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 878fbddb6a..81ccb8dd1a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) } _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + regs->msr &= ~MSR_DE; + + if (user_mode(regs)) { + current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | + DBCR0_IDM); + } else { + /* Disable DAC interupts */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | + DBSR_DAC1W | DBCR0_IDM)); + + /* Clear the DAC event */ + mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); + } + /* Setup and send the trap to the handler */ + do_dabr(regs, mfspr(SPRN_DAC1), debug_status); } } #endif /* CONFIG_4xx || CONFIG_BOOKE */ diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b77f8af7dd..ade8aeaa2e 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1,11 +1,12 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003-2005 IBM Corp. + * Copyright (c) 2003,2008 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard * Stephen Rothwell + * Robert Jennings * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; +#ifdef CONFIG_PPC_SMLPAR +/** + * vio_cmo_pool - A pool of IO memory for CMO use + * + * @size: The size of the pool in bytes + * @free: The amount of free memory in the pool + */ +struct vio_cmo_pool { + size_t size; + size_t free; +}; + +/* How many ms to delay queued balance work */ +#define VIO_CMO_BALANCE_DELAY 100 + +/* Portion out IO memory to CMO devices by this chunk size */ +#define VIO_CMO_BALANCE_CHUNK 131072 + +/** + * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement + * + * @vio_dev: struct vio_dev pointer + * @list: pointer to other devices on bus that are being tracked + */ +struct vio_cmo_dev_entry { + struct vio_dev *viodev; + struct list_head list; +}; + +/** + * vio_cmo - VIO bus accounting structure for CMO entitlement + * + * @lock: spinlock for entire structure + * @balance_q: work queue for balancing system entitlement + * @device_list: list of CMO-enabled devices requiring entitlement + * @entitled: total system entitlement in bytes + * @reserve: pool of memory from which devices reserve entitlement, incl. spare + * @excess: pool of excess entitlement not needed for device reserves or spare + * @spare: IO memory for device hotplug functionality + * @min: minimum necessary for system operation + * @desired: desired memory for system operation + * @curr: bytes currently allocated + * @high: high water mark for IO data usage + */ +struct vio_cmo { + spinlock_t lock; + struct delayed_work balance_q; + struct list_head device_list; + size_t entitled; + struct vio_cmo_pool reserve; + struct vio_cmo_pool excess; + size_t spare; + size_t min; + size_t desired; + size_t curr; + size_t high; +} vio_cmo; + +/** + * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows + */ +static int vio_cmo_num_OF_devs(void) +{ + struct device_node *node_vroot; + int count = 0; + + /* + * Count the number of vdevice entries with an + * ibm,my-dma-window OF property + */ + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + struct property *prop; + + for_each_child_of_node(node_vroot, of_node) { + prop = of_find_property(of_node, "ibm,my-dma-window", + NULL); + if (prop) + count++; + } + } + of_node_put(node_vroot); + return count; +} + +/** + * vio_cmo_alloc - allocate IO memory for CMO-enable devices + * + * @viodev: VIO device requesting IO memory + * @size: size of allocation requested + * + * Allocations come from memory reserved for the devices and any excess + * IO memory available to all devices. The spare pool used to service + * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be + * made available. + * + * Return codes: + * 0 for successful allocation and -ENOMEM for a failure + */ +static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t reserve_free = 0; + size_t excess_free = 0; + int ret = -ENOMEM; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Determine the amount of free entitlement available in reserve */ + if (viodev->cmo.entitled > viodev->cmo.allocated) + reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; + + /* If spare is not fulfilled, the excess pool can not be used. */ + if (vio_cmo.spare >= VIO_CMO_MIN_ENT) + excess_free = vio_cmo.excess.free; + + /* The request can be satisfied */ + if ((reserve_free + excess_free) >= size) { + vio_cmo.curr += size; + if (vio_cmo.curr > vio_cmo.high) + vio_cmo.high = vio_cmo.curr; + viodev->cmo.allocated += size; + size -= min(reserve_free, size); + vio_cmo.excess.free -= size; + ret = 0; + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return ret; +} + +/** + * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices + * @viodev: VIO device freeing IO memory + * @size: size of deallocation + * + * IO memory is freed by the device back to the correct memory pools. + * The spare pool is replenished first from either memory pool, then + * the reserve pool is used to reduce device entitlement, the excess + * pool is used to increase the reserve pool toward the desired entitlement + * target, and then the remaining memory is returned to the pools. + * + */ +static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t spare_needed = 0; + size_t excess_freed = 0; + size_t reserve_freed = size; + size_t tmp; + int balance = 0; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.curr -= size; + + /* Amount of memory freed from the excess pool */ + if (viodev->cmo.allocated > viodev->cmo.entitled) { + excess_freed = min(reserve_freed, (viodev->cmo.allocated - + viodev->cmo.entitled)); + reserve_freed -= excess_freed; + } + + /* Remove allocation from device */ + viodev->cmo.allocated -= (reserve_freed + excess_freed); + + /* Spare is a subset of the reserve pool, replenish it first. */ + spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; + + /* + * Replenish the spare in the reserve pool from the excess pool. + * This moves entitlement into the reserve pool. + */ + if (spare_needed && excess_freed) { + tmp = min(excess_freed, spare_needed); + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + vio_cmo.spare += tmp; + excess_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Replenish the spare in the reserve pool from the reserve pool. + * This removes entitlement from the device down to VIO_CMO_MIN_ENT, + * if needed, and gives it to the spare pool. The amount of used + * memory in this pool does not change. + */ + if (spare_needed && reserve_freed) { + tmp = min(spare_needed, min(reserve_freed, + (viodev->cmo.entitled - + VIO_CMO_MIN_ENT))); + + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + reserve_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Increase the reserve pool until the desired allocation is met. + * Move an allocation freed from the excess pool into the reserve + * pool and schedule a balance operation. + */ + if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { + tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); + + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + excess_freed -= tmp; + balance = 1; + } + + /* Return memory from the excess pool to that pool */ + if (excess_freed) + vio_cmo.excess.free += excess_freed; + + if (balance) + schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_entitlement_update - Manage system entitlement changes + * + * @new_entitlement: new system entitlement to attempt to accommodate + * + * Increases in entitlement will be used to fulfill the spare entitlement + * and the rest is given to the excess pool. Decreases, if they are + * possible, come from the excess pool and from unused device entitlement + * + * Returns: 0 on success, -ENOMEM when change can not be made + */ +int vio_cmo_entitlement_update(size_t new_entitlement) +{ + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail, delta, tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Entitlement increases */ + if (new_entitlement > vio_cmo.entitled) { + delta = new_entitlement - vio_cmo.entitled; + + /* Fulfill spare allocation */ + if (vio_cmo.spare < VIO_CMO_MIN_ENT) { + tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); + vio_cmo.spare += tmp; + vio_cmo.reserve.size += tmp; + delta -= tmp; + } + + /* Remaining new allocation goes to the excess pool */ + vio_cmo.entitled += delta; + vio_cmo.excess.size += delta; + vio_cmo.excess.free += delta; + + goto out; + } + + /* Entitlement decreases */ + delta = vio_cmo.entitled - new_entitlement; + avail = vio_cmo.excess.free; + + /* + * Need to check how much unused entitlement each device can + * sacrifice to fulfill entitlement change. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (avail >= delta) + break; + + viodev = dev_ent->viodev; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + avail += viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + } + + if (delta <= avail) { + vio_cmo.entitled -= delta; + + /* Take entitlement from the excess pool first */ + tmp = min(vio_cmo.excess.free, delta); + vio_cmo.excess.size -= tmp; + vio_cmo.excess.free -= tmp; + delta -= tmp; + + /* + * Remove all but VIO_CMO_MIN_ENT bytes from devices + * until entitlement change is served + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (!delta) + break; + + viodev = dev_ent->viodev; + tmp = 0; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + tmp = viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + viodev->cmo.entitled -= min(tmp, delta); + delta -= min(tmp, delta); + } + } else { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + +out: + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_balance - Balance entitlement among devices + * + * @work: work queue structure for this operation + * + * Any system entitlement above the minimum needed for devices, or + * already allocated to devices, can be distributed to the devices. + * The list of devices is iterated through to recalculate the desired + * entitlement level and to determine how much entitlement above the + * minimum entitlement is allocated to devices. + * + * Small chunks of the available entitlement are given to devices until + * their requirements are fulfilled or there is no entitlement left to give. + * Upon completion sizes of the reserve and excess pools are calculated. + * + * The system minimum entitlement level is also recalculated here. + * Entitlement will be reserved for devices even after vio_bus_remove to + * accommodate reloading the driver. The OF tree is walked to count the + * number of devices present and this will remove entitlement for devices + * that have actually left the system after having vio_bus_remove called. + */ +static void vio_cmo_balance(struct work_struct *work) +{ + struct vio_cmo *cmo; + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail = 0, level, chunk, need; + int devcount = 0, fulfilled; + + cmo = container_of(work, struct vio_cmo, balance_q.work); + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Calculate minimum entitlement and fulfill spare */ + cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; + BUG_ON(cmo->min > cmo->entitled); + cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); + cmo->min += cmo->spare; + cmo->desired = cmo->min; + + /* + * Determine how much entitlement is available and reset device + * entitlements + */ + avail = cmo->entitled - cmo->spare; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + devcount++; + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); + avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); + } + + /* + * Having provided each device with the minimum entitlement, loop + * over the devices portioning out the remaining entitlement + * until there is nothing left. + */ + level = VIO_CMO_MIN_ENT; + while (avail) { + fulfilled = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + + if (viodev->cmo.desired <= level) { + fulfilled++; + continue; + } + + /* + * Give the device up to VIO_CMO_BALANCE_CHUNK + * bytes of entitlement, but do not exceed the + * desired level of entitlement for the device. + */ + chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); + chunk = min(chunk, (viodev->cmo.desired - + viodev->cmo.entitled)); + viodev->cmo.entitled += chunk; + + /* + * If the memory for this entitlement increase was + * already allocated to the device it does not come + * from the available pool being portioned out. + */ + need = max(viodev->cmo.allocated, viodev->cmo.entitled)- + max(viodev->cmo.allocated, level); + avail -= need; + + } + if (fulfilled == devcount) + break; + level += VIO_CMO_BALANCE_CHUNK; + } + + /* Calculate new reserve and excess pool sizes */ + cmo->reserve.size = cmo->min; + cmo->excess.free = 0; + cmo->excess.size = 0; + need = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + /* Calculated reserve size above the minimum entitlement */ + if (viodev->cmo.entitled) + cmo->reserve.size += (viodev->cmo.entitled - + VIO_CMO_MIN_ENT); + /* Calculated used excess entitlement */ + if (viodev->cmo.allocated > viodev->cmo.entitled) + need += viodev->cmo.allocated - viodev->cmo.entitled; + } + cmo->excess.size = cmo->entitled - cmo->reserve.size; + cmo->excess.free = cmo->excess.size - need; + + cancel_delayed_work(container_of(work, struct delayed_work, work)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + struct vio_dev *viodev = to_vio_dev(dev); + void *ret; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return NULL; + } + + ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag); + if (unlikely(ret == NULL)) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr, + size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + dma_addr_t ret = DMA_ERROR_CODE; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return ret; + } + + ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs); + if (unlikely(dma_mapping_error(ret))) { + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_unmap_single(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs); + + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); +} + +static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + int ret, count = 0; + size_t alloc_size = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + + if (vio_cmo_alloc(viodev, alloc_size)) { + atomic_inc(&viodev->cmo.allocs_failed); + return 0; + } + + ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); + + if (unlikely(!ret)) { + vio_cmo_dealloc(viodev, alloc_size); + atomic_inc(&viodev->cmo.allocs_failed); + } + + for (sgl = sglist, count = 0; count < ret; count++, sgl++) + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + if (alloc_size) + vio_cmo_dealloc(viodev, alloc_size); + + return ret; +} + +static void vio_dma_iommu_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct scatterlist *sgl; + size_t alloc_size = 0; + int count = 0; + + for (sgl = sglist; count < nelems; count++, sgl++) + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + + dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); + + vio_cmo_dealloc(viodev, alloc_size); +} + +struct dma_mapping_ops vio_dma_mapping_ops = { + .alloc_coherent = vio_dma_iommu_alloc_coherent, + .free_coherent = vio_dma_iommu_free_coherent, + .map_single = vio_dma_iommu_map_single, + .unmap_single = vio_dma_iommu_unmap_single, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, +}; + +/** + * vio_cmo_set_dev_desired - Set desired entitlement for a device + * + * @viodev: struct vio_dev for device to alter + * @new_desired: new desired entitlement level in bytes + * + * For use by devices to request a change to their entitlement at runtime or + * through sysfs. The desired entitlement level is changed and a balancing + * of system resources is scheduled to run in the future. + */ +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) +{ + unsigned long flags; + struct vio_cmo_dev_entry *dev_ent; + int found = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (desired < VIO_CMO_MIN_ENT) + desired = VIO_CMO_MIN_ENT; + + /* + * Changes will not be made for devices not in the device list. + * If it is not in the device list, then no driver is loaded + * for the device and it can not receive entitlement. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + found = 1; + break; + } + if (!found) + return; + + /* Increase/decrease in desired device entitlement */ + if (desired >= viodev->cmo.desired) { + /* Just bump the bus and device values prior to a balance*/ + vio_cmo.desired += desired - viodev->cmo.desired; + viodev->cmo.desired = desired; + } else { + /* Decrease bus and device values for desired entitlement */ + vio_cmo.desired -= viodev->cmo.desired - desired; + viodev->cmo.desired = desired; + /* + * If less entitlement is desired than current entitlement, move + * any reserve memory in the change region to the excess pool. + */ + if (viodev->cmo.entitled > desired) { + vio_cmo.reserve.size -= viodev->cmo.entitled - desired; + vio_cmo.excess.size += viodev->cmo.entitled - desired; + /* + * If entitlement moving from the reserve pool to the + * excess pool is currently unused, add to the excess + * free counter. + */ + if (viodev->cmo.allocated < viodev->cmo.entitled) + vio_cmo.excess.free += viodev->cmo.entitled - + max(viodev->cmo.allocated, desired); + viodev->cmo.entitled = desired; + } + } + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_bus_probe - Handle CMO specific bus probe activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Determine the devices IO memory entitlement needs, attempting + * to satisfy the system minimum entitlement at first and scheduling + * a balance operation to take care of the rest at a later time. + * + * Returns: 0 on success, -EINVAL when device doesn't support CMO, and + * -ENOMEM when entitlement is not available for device or + * device entry. + * + */ +static int vio_cmo_bus_probe(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + struct device *dev = &viodev->dev; + struct vio_driver *viodrv = to_vio_driver(dev->driver); + unsigned long flags; + size_t size; + + /* + * Check to see that device has a DMA window and configure + * entitlement for the device. + */ + if (of_get_property(viodev->dev.archdata.of_node, + "ibm,my-dma-window", NULL)) { + /* Check that the driver is CMO enabled and get desired DMA */ + if (!viodrv->get_desired_dma) { + dev_err(dev, "%s: device driver does not support CMO\n", + __func__); + return -EINVAL; + } + + viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + if (viodev->cmo.desired < VIO_CMO_MIN_ENT) + viodev->cmo.desired = VIO_CMO_MIN_ENT; + size = VIO_CMO_MIN_ENT; + + dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), + GFP_KERNEL); + if (!dev_ent) + return -ENOMEM; + + dev_ent->viodev = viodev; + spin_lock_irqsave(&vio_cmo.lock, flags); + list_add(&dev_ent->list, &vio_cmo.device_list); + } else { + viodev->cmo.desired = 0; + size = 0; + spin_lock_irqsave(&vio_cmo.lock, flags); + } + + /* + * If the needs for vio_cmo.min have not changed since they + * were last set, the number of devices in the OF tree has + * been constant and the IO memory for this is already in + * the reserve pool. + */ + if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * + VIO_CMO_MIN_ENT)) { + /* Updated desired entitlement if device requires it */ + if (size) + vio_cmo.desired += (viodev->cmo.desired - + VIO_CMO_MIN_ENT); + } else { + size_t tmp; + + tmp = vio_cmo.spare + vio_cmo.excess.free; + if (tmp < size) { + dev_err(dev, "%s: insufficient free " + "entitlement to add device. " + "Need %lu, have %lu\n", __func__, + size, (vio_cmo.spare + tmp)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + + /* Use excess pool first to fulfill request */ + tmp = min(size, vio_cmo.excess.free); + vio_cmo.excess.free -= tmp; + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + + /* Use spare if excess pool was insufficient */ + vio_cmo.spare -= size - tmp; + + /* Update bus accounting */ + vio_cmo.min += size; + vio_cmo.desired += viodev->cmo.desired; + } + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_bus_remove - Handle CMO specific bus removal activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Remove the device from the cmo device list. The minimum entitlement + * will be reserved for the device as long as it is in the system. The + * rest of the entitlement the device had been allocated will be returned + * to the system. + */ +static void vio_cmo_bus_remove(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (viodev->cmo.allocated) { + dev_err(&viodev->dev, "%s: device had %lu bytes of IO " + "allocated after remove operation.\n", + __func__, viodev->cmo.allocated); + BUG(); + } + + /* + * Remove the device from the device list being maintained for + * CMO enabled devices. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + list_del(&dev_ent->list); + kfree(dev_ent); + break; + } + + /* + * Devices may not require any entitlement and they do not need + * to be processed. Otherwise, return the device's entitlement + * back to the pools. + */ + if (viodev->cmo.entitled) { + /* + * This device has not yet left the OF tree, it's + * minimum entitlement remains in vio_cmo.min and + * vio_cmo.desired + */ + vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); + + /* + * Save min allocation for device in reserve as long + * as it exists in OF tree as determined by later + * balance operation + */ + viodev->cmo.entitled -= VIO_CMO_MIN_ENT; + + /* Replenish spare from freed reserve pool */ + if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { + tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - + vio_cmo.spare)); + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + } + + /* Remaining reserve goes to excess pool */ + vio_cmo.excess.size += viodev->cmo.entitled; + vio_cmo.excess.free += viodev->cmo.entitled; + vio_cmo.reserve.size -= viodev->cmo.entitled; + + /* + * Until the device is removed it will keep a + * minimum entitlement; this will guarantee that + * a module unload/load will result in a success. + */ + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + viodev->cmo.desired = VIO_CMO_MIN_ENT; + atomic_set(&viodev->cmo.allocs_failed, 0); + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) +{ + vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; + viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; +} + +/** + * vio_cmo_bus_init - CMO entitlement initialization at bus init time + * + * Set up the reserve and excess entitlement pools based on available + * system entitlement and the number of devices in the OF tree that + * require entitlement in the reserve pool. + */ +static void vio_cmo_bus_init(void) +{ + struct hvcall_mpp_data mpp_data; + int err; + + memset(&vio_cmo, 0, sizeof(struct vio_cmo)); + spin_lock_init(&vio_cmo.lock); + INIT_LIST_HEAD(&vio_cmo.device_list); + INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); + + /* Get current system entitlement */ + err = h_get_mpp(&mpp_data); + + /* + * On failure, continue with entitlement set to 0, will panic() + * later when spare is reserved. + */ + if (err != H_SUCCESS) { + printk(KERN_ERR "%s: unable to determine system IO "\ + "entitlement. (%d)\n", __func__, err); + vio_cmo.entitled = 0; + } else { + vio_cmo.entitled = mpp_data.entitled_mem; + } + + /* Set reservation and check against entitlement */ + vio_cmo.spare = VIO_CMO_MIN_ENT; + vio_cmo.reserve.size = vio_cmo.spare; + vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * + VIO_CMO_MIN_ENT); + if (vio_cmo.reserve.size > vio_cmo.entitled) { + printk(KERN_ERR "%s: insufficient system entitlement\n", + __func__); + panic("%s: Insufficient system entitlement", __func__); + } + + /* Set the remaining accounting variables */ + vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; + vio_cmo.excess.free = vio_cmo.excess.size; + vio_cmo.min = vio_cmo.reserve.size; + vio_cmo.desired = vio_cmo.reserve.size; +} + +/* sysfs device functions and data structures for CMO */ + +#define viodev_cmo_rd_attr(name) \ +static ssize_t viodev_cmo_##name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ +} + +static ssize_t viodev_cmo_allocs_failed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *viodev = to_vio_dev(dev); + return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); +} + +static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + atomic_set(&viodev->cmo.allocs_failed, 0); + return count; +} + +static ssize_t viodev_cmo_desired_set(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + size_t new_desired; + int ret; + + ret = strict_strtoul(buf, 10, &new_desired); + if (ret) + return ret; + + vio_cmo_set_dev_desired(viodev, new_desired); + return count; +} + +viodev_cmo_rd_attr(desired); +viodev_cmo_rd_attr(entitled); +viodev_cmo_rd_attr(allocated); + +static ssize_t name_show(struct device *, struct device_attribute *, char *); +static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static struct device_attribute vio_cmo_dev_attrs[] = { + __ATTR_RO(name), + __ATTR_RO(devspec), + __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_desired_show, viodev_cmo_desired_set), + __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), + __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL), + __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset), + __ATTR_NULL +}; + +/* sysfs bus functions and data structures for CMO */ + +#define viobus_cmo_rd_attr(name) \ +static ssize_t \ +viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name); \ +} + +#define viobus_cmo_pool_rd_attr(name, var) \ +static ssize_t \ +viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} + +static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf, + size_t count) +{ + unsigned long flags; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.high = vio_cmo.curr; + spin_unlock_irqrestore(&vio_cmo.lock, flags); + + return count; +} + +viobus_cmo_rd_attr(entitled); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_rd_attr(high); + +static struct bus_attribute vio_cmo_bus_attrs[] = { + __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL), + __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL), + __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL), + __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL), + __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL), + __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL), + __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL), + __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL), + __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, + viobus_cmo_high_show, viobus_cmo_high_reset), + __ATTR_NULL +}; + +static void vio_cmo_sysfs_init(void) +{ + vio_bus_type.dev_attrs = vio_cmo_dev_attrs; + vio_bus_type.bus_attrs = vio_cmo_bus_attrs; +} +#else /* CONFIG_PPC_SMLPAR */ +/* Dummy functions for iSeries platform */ +int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} +static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } +static void vio_cmo_bus_remove(struct vio_dev *viodev) {} +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} +static void vio_cmo_bus_init() {} +static void vio_cmo_sysfs_init() { } +#endif /* CONFIG_PPC_SMLPAR */ +EXPORT_SYMBOL(vio_cmo_entitlement_update); +EXPORT_SYMBOL(vio_cmo_set_dev_desired); + static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) { const unsigned char *dma_window; @@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev) return error; id = vio_match_device(viodrv->id_table, viodev); - if (id) + if (id) { + memset(&viodev->cmo, 0, sizeof(viodev->cmo)); + if (firmware_has_feature(FW_FEATURE_CMO)) { + error = vio_cmo_bus_probe(viodev); + if (error) + return error; + } error = viodrv->probe(viodev, id); + if (error) + vio_cmo_bus_remove(viodev); + } return error; } @@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev) { struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); + struct device *devptr; + int ret = 1; + + /* + * Hold a reference to the device after the remove function is called + * to allow for CMO accounting cleanup for the device. + */ + devptr = get_device(dev); if (viodrv->remove) - return viodrv->remove(viodev); + ret = viodrv->remove(viodev); + + if (!ret && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); - /* driver can't remove */ - return 1; + put_device(devptr); + return ret; } /** @@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->unit_address = *unit_address; } viodev->dev.archdata.of_node = of_node_get(of_node); - viodev->dev.archdata.dma_ops = &dma_iommu_ops; + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + viodev->dev.archdata.dma_ops = &dma_iommu_ops; viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); viodev->dev.archdata.numa_node = of_node_to_nid(of_node); @@ -245,6 +1260,9 @@ static int __init vio_bus_init(void) int err; struct device_node *node_vroot; + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_sysfs_init(); + err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); @@ -262,6 +1280,9 @@ static int __init vio_bus_init(void) return err; } + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_init(); + node_vroot = of_find_node_by_name(NULL, "vdevice"); if (node_vroot) { struct device_node *of_node; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a914411bce..4a8ce62fe1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS /* The dummy segment contents for the bug workaround mentioned above near PHDRS. */ - .dummy : { + .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { LONG(0xf177) } :kernel :dummy diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1707d00331..565b7a237c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs) return 0; } -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -static void do_dabr(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - siginfo_t info; - - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, - 11, SIGSEGV) == NOTIFY_STOP) - return; - - if (debugger_dabr_match(regs)) - return; - - /* Clear the DABR */ - set_dabr(0); - - /* Deliver the signal to userspace */ - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)address; - force_sig_info(SIGTRAP, &info, current); -} -#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index ccbd495841..696a5ee496 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -1,7 +1,6 @@ config PPC_MPC52xx bool "52xx-based boards" depends on PPC_MULTIPLATFORM && PPC32 - select FSL_SOC select PPC_CLOCK select PPC_PCI_CHOICE @@ -49,5 +48,6 @@ config PPC_MPC5200_GPIO bool "MPC5200 GPIO support" depends on PPC_MPC52xx select ARCH_REQUIRE_GPIOLIB + select GENERIC_GPIO help Enable gpiolib support for mpc5200 based boards diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 208005ca26..e06420af5f 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, } } -static void tce_build_cell(struct iommu_table *tbl, long index, long npages, +static int tce_build_cell(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { @@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages, pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", index, npages, direction, base_pte); + return 0; } static void tce_free_cell(struct iommu_table *tbl, long index, long npages) @@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled; static int __init setup_iommu_fixed(char *str) { + struct device_node *pciep; + if (strcmp(str, "off") == 0) iommu_fixed_disabled = 1; - else if (strcmp(str, "weak") == 0) + /* If we can find a pcie-endpoint in the device tree assume that + * we're on a triblade or a CAB so by default the fixed mapping + * should be set to be weakly ordered; but only if the boot + * option WASN'T set for strong ordering + */ + pciep = of_find_node_by_type(NULL, "pcie-endpoint"); + + if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) iommu_fixed_is_weak = 1; + of_node_put(pciep); + return 1; } __setup("iommu_fixed=", setup_iommu_fixed); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 3465474336..2deeeba7ec 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -312,10 +312,27 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, */ node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { + int available_spus; + node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; + + available_spus = 0; mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->ctx && spu->ctx->gang + && spu->ctx->aff_offset == 0) + available_spus -= + (spu->ctx->gang->contexts - 1); + else + available_spus++; + } + if (available_spus < ctx->gang->contexts) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + continue; + } + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if ((!mem_aff || spu->has_mem_affinity) && sched_spu(spu)) { @@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx) if (list_empty(&ctx->aff_list)) return 0; + if (atomic_read(&ctx->gang->aff_sched_count) == 0) + ctx->gang->aff_ref_spu = NULL; + if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED)) aff_merge_remaining_ctxs(gang); @@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) if (spu->ctx->flags & SPU_CREATE_NOSCHED) atomic_dec(&cbe_spu_info[spu->node].reserved_spus); - if (ctx->gang){ - mutex_lock(&ctx->gang->aff_mutex); - if (has_affinity(ctx)) { - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; - } - mutex_unlock(&ctx->gang->aff_mutex); - } + if (ctx->gang) + atomic_dec_if_positive(&ctx->gang->aff_sched_count); spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); @@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx) goto found; mutex_unlock(&cbe_spu_info[node].list_mutex); - mutex_lock(&ctx->gang->aff_mutex); - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; - mutex_unlock(&ctx->gang->aff_mutex); + atomic_dec(&ctx->gang->aff_sched_count); goto not_found; } mutex_unlock(&ctx->gang->aff_mutex); diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c index 8c0e95766a..92d20e993e 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.c +++ b/arch/powerpc/platforms/cell/spufs/sputrace.c @@ -196,8 +196,7 @@ static int __init sputrace_init(void) struct proc_dir_entry *entry; int i, error = -ENOMEM; - sputrace_log = kcalloc(sizeof(struct sputrace), - bufsize, GFP_KERNEL); + sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL); if (!sputrace_log) goto out; diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index bc818e4e20..bb464d1211 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -41,7 +41,7 @@ #include #include -static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, +static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { @@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, index++; uaddr += TCE_PAGE_SIZE; } + return 0; } static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 70541b7a50..a0ff03a3d8 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -83,7 +83,7 @@ static u32 *iob_l2_base; static struct iommu_table iommu_table_iobmap; static int iommu_table_iobmap_inited; -static void iobmap_build(struct iommu_table *tbl, long index, +static int iobmap_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index, uaddr += IOBMAP_PAGE_SIZE; bus_addr += IOBMAP_PAGE_SIZE; } + return 0; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 757c0296e0..97619fd51e 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG depends on PPC_PSERIES && PPC_EARLY_DEBUG bool "Enable extra debug logging in platforms/pseries" default y + +config PPC_SMLPAR + bool "Support for shared-memory logical partitions" + depends on PPC_PSERIES + select LPARCFG + default n + help + Select this option to enable shared memory partition support. + With this option a system running in an LPAR can be given more + memory than physically available and will allow firmware to + balance memory across many LPARs. + +config CMM + tristate "Collaborative memory management" + depends on PPC_SMLPAR + default y + help + Select this option, if you want to enable the kernel interface + to reduce the memory size of the system. This is accomplished + by allocating pages of memory and put them "on hold". This only + makes sense for a system running in an LPAR where the unused pages + will be reused for other LPARs. The interface allows firmware to + balance memory across many LPARs. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 554c6e42ef..dfe574af2d 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o +obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c new file mode 100644 index 0000000000..c6b3be0316 --- /dev/null +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -0,0 +1,468 @@ +/* + * Collaborative memory management interface. + * + * Copyright (C) 2008 IBM Corporation + * Author(s): Brian King (brking@linux.vnet.ibm.com), + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "plpar_wrappers.h" + +#define CMM_DRIVER_VERSION "1.0.0" +#define CMM_DEFAULT_DELAY 1 +#define CMM_DEBUG 0 +#define CMM_DISABLE 0 +#define CMM_OOM_KB 1024 +#define CMM_MIN_MEM_MB 256 +#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) + +static unsigned int delay = CMM_DEFAULT_DELAY; +static unsigned int oom_kb = CMM_OOM_KB; +static unsigned int cmm_debug = CMM_DEBUG; +static unsigned int cmm_disabled = CMM_DISABLE; +static unsigned long min_mem_mb = CMM_MIN_MEM_MB; +static struct sys_device cmm_sysdev; + +MODULE_AUTHOR("Brian King "); +MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(CMM_DRIVER_VERSION); + +module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " + "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); +module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " + "[Default=" __stringify(CMM_OOM_KB) "]"); +module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " + "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); +module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " + "[Default=" __stringify(CMM_DEBUG) "]"); + +#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long)) + +#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } + +struct cmm_page_array { + struct cmm_page_array *next; + unsigned long index; + unsigned long page[CMM_NR_PAGES]; +}; + +static unsigned long loaned_pages; +static unsigned long loaned_pages_target; +static unsigned long oom_freed_pages; + +static struct cmm_page_array *cmm_page_list; +static DEFINE_SPINLOCK(cmm_lock); + +static struct task_struct *cmm_thread_ptr; + +/** + * cmm_alloc_pages - Allocate pages and mark them as loaned + * @nr: number of pages to allocate + * + * Return value: + * number of pages requested to be allocated which were not + **/ +static long cmm_alloc_pages(long nr) +{ + struct cmm_page_array *pa, *npa; + unsigned long addr; + long rc; + + cmm_dbg("Begin request for %ld pages\n", nr); + + while (nr) { + addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); + if (!addr) + break; + spin_lock(&cmm_lock); + pa = cmm_page_list; + if (!pa || pa->index >= CMM_NR_PAGES) { + /* Need a new page for the page list. */ + spin_unlock(&cmm_lock); + npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | + __GFP_NORETRY | __GFP_NOMEMALLOC); + if (!npa) { + pr_info("%s: Can not allocate new page list\n", __FUNCTION__); + free_page(addr); + break; + } + spin_lock(&cmm_lock); + pa = cmm_page_list; + + if (!pa || pa->index >= CMM_NR_PAGES) { + npa->next = pa; + npa->index = 0; + pa = npa; + cmm_page_list = pa; + } else + free_page((unsigned long) npa); + } + + if ((rc = plpar_page_set_loaned(__pa(addr)))) { + pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc); + spin_unlock(&cmm_lock); + free_page(addr); + break; + } + + pa->page[pa->index++] = addr; + loaned_pages++; + totalram_pages--; + spin_unlock(&cmm_lock); + nr--; + } + + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_free_pages - Free pages and mark them as active + * @nr: number of pages to free + * + * Return value: + * number of pages requested to be freed which were not + **/ +static long cmm_free_pages(long nr) +{ + struct cmm_page_array *pa; + unsigned long addr; + + cmm_dbg("Begin free of %ld pages.\n", nr); + spin_lock(&cmm_lock); + pa = cmm_page_list; + while (nr) { + if (!pa || pa->index <= 0) + break; + addr = pa->page[--pa->index]; + + if (pa->index == 0) { + pa = pa->next; + free_page((unsigned long) cmm_page_list); + cmm_page_list = pa; + } + + plpar_page_set_active(__pa(addr)); + free_page(addr); + loaned_pages--; + nr--; + totalram_pages++; + } + spin_unlock(&cmm_lock); + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_oom_notify - OOM notifier + * @self: notifier block struct + * @dummy: not used + * @parm: returned - number of pages freed + * + * Return value: + * NOTIFY_OK + **/ +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = KB2PAGES(oom_kb); + + cmm_dbg("OOM processing started\n"); + nr = cmm_free_pages(nr); + loaned_pages_target = loaned_pages; + *freed += KB2PAGES(oom_kb) - nr; + oom_freed_pages += KB2PAGES(oom_kb) - nr; + cmm_dbg("OOM processing complete\n"); + return NOTIFY_OK; +} + +/** + * cmm_get_mpp - Read memory performance parameters + * + * Makes hcall to query the current page loan request from the hypervisor. + * + * Return value: + * nothing + **/ +static void cmm_get_mpp(void) +{ + int rc; + struct hvcall_mpp_data mpp_data; + unsigned long active_pages_target; + signed long page_loan_request; + + rc = h_get_mpp(&mpp_data); + + if (rc != H_SUCCESS) + return; + + page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); + loaned_pages_target = page_loan_request + loaned_pages; + if (loaned_pages_target > oom_freed_pages) + loaned_pages_target -= oom_freed_pages; + else + loaned_pages_target = 0; + + active_pages_target = totalram_pages + loaned_pages - loaned_pages_target; + + if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE)) + loaned_pages_target = totalram_pages + loaned_pages - + ((min_mem_mb * 1024 * 1024) / PAGE_SIZE); + + cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", + page_loan_request, loaned_pages, loaned_pages_target, + oom_freed_pages, totalram_pages); +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify +}; + +/** + * cmm_thread - CMM task thread + * @dummy: not used + * + * Return value: + * 0 + **/ +static int cmm_thread(void *dummy) +{ + unsigned long timeleft; + + while (1) { + timeleft = msleep_interruptible(delay * 1000); + + if (kthread_should_stop() || timeleft) { + loaned_pages_target = loaned_pages; + break; + } + + cmm_get_mpp(); + + if (loaned_pages_target > loaned_pages) { + if (cmm_alloc_pages(loaned_pages_target - loaned_pages)) + loaned_pages_target = loaned_pages; + } else if (loaned_pages_target < loaned_pages) + cmm_free_pages(loaned_pages - loaned_pages_target); + } + return 0; +} + +#define CMM_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ + static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) + +CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages)); +CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); + +static ssize_t show_oom_pages(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); +} + +static ssize_t store_oom_pages(struct sys_device *dev, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul (buf, NULL, 10); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (val != 0) + return -EBADMSG; + + oom_freed_pages = 0; + return count; +} + +static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO, + show_oom_pages, store_oom_pages); + +static struct sysdev_attribute *cmm_attrs[] = { + &attr_loaned_kb, + &attr_loaned_target_kb, + &attr_oom_freed_kb, +}; + +static struct sysdev_class cmm_sysdev_class = { + .name = "cmm", +}; + +/** + * cmm_sysfs_register - Register with sysfs + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_sysfs_register(struct sys_device *sysdev) +{ + int i, rc; + + if ((rc = sysdev_class_register(&cmm_sysdev_class))) + return rc; + + sysdev->id = 0; + sysdev->cls = &cmm_sysdev_class; + + if ((rc = sysdev_register(sysdev))) + goto class_unregister; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { + if ((rc = sysdev_create_file(sysdev, cmm_attrs[i]))) + goto fail; + } + + return 0; + +fail: + while (--i >= 0) + sysdev_remove_file(sysdev, cmm_attrs[i]); + sysdev_unregister(sysdev); +class_unregister: + sysdev_class_unregister(&cmm_sysdev_class); + return rc; +} + +/** + * cmm_unregister_sysfs - Unregister from sysfs + * + **/ +static void cmm_unregister_sysfs(struct sys_device *sysdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) + sysdev_remove_file(sysdev, cmm_attrs[i]); + sysdev_unregister(sysdev); + sysdev_class_unregister(&cmm_sysdev_class); +} + +/** + * cmm_init - Module initialization + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_init(void) +{ + int rc = -ENOMEM; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return -EOPNOTSUPP; + + if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0) + return rc; + + if ((rc = cmm_sysfs_register(&cmm_sysdev))) + goto out_oom_notifier; + + if (cmm_disabled) + return rc; + + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) { + rc = PTR_ERR(cmm_thread_ptr); + goto out_unregister_sysfs; + } + + return rc; + +out_unregister_sysfs: + cmm_unregister_sysfs(&cmm_sysdev); +out_oom_notifier: + unregister_oom_notifier(&cmm_oom_nb); + return rc; +} + +/** + * cmm_exit - Module exit + * + * Return value: + * nothing + **/ +static void cmm_exit(void) +{ + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + unregister_oom_notifier(&cmm_oom_nb); + cmm_free_pages(loaned_pages); + cmm_unregister_sysfs(&cmm_sysdev); +} + +/** + * cmm_set_disable - Disable/Enable CMM + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_set_disable(const char *val, struct kernel_param *kp) +{ + int disable = simple_strtoul(val, NULL, 10); + + if (disable != 0 && disable != 1) + return -EINVAL; + + if (disable && !cmm_disabled) { + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + cmm_thread_ptr = NULL; + cmm_free_pages(loaned_pages); + } else if (!disable && cmm_disabled) { + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) + return PTR_ERR(cmm_thread_ptr); + } + + cmm_disabled = disable; + return 0; +} + +module_param_call(disable, cmm_set_disable, param_get_uint, + &cmm_disabled, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " + "[Default=" __stringify(CMM_DISABLE) "]"); + +module_init(cmm_init); +module_exit(cmm_exit); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 5377dd4b84..a8c446697f 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -48,7 +48,7 @@ #include "plpar_wrappers.h" -static void tce_build_pSeries(struct iommu_table *tbl, long index, +static int tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index, uaddr += TCE_PAGE_SIZE; tcep++; } + return 0; } @@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return *tcep; } -static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); + +static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { - u64 rc; + u64 rc = 0; u64 proto_tce, tce; u64 rpn; + int ret = 0; + long tcenum_start = tcenum, npages_start = npages; rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; @@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_free_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + 1))); + break; + } + if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); @@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum++; rpn++; } + return ret; } static DEFINE_PER_CPU(u64 *, tce_page) = NULL; -static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { - u64 rc; + u64 rc = 0; u64 proto_tce; u64 *tcep; u64 rpn; long l, limit; + long tcenum_start = tcenum, npages_start = npages; + int ret = 0; if (npages == 1) { - tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); - return; + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction, attrs); } tcep = __get_cpu_var(tce_page); @@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcep = (u64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); - return; } __get_cpu_var(tce_page) = tcep; } @@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, tcenum += limit; } while (npages > 0 && !rc); + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_freemulti_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + limit))); + return ret; + } + if (rc && printk_ratelimit()) { printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); printk("\tindex = 0x%lx\n", (u64)tbl->it_index); @@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\ttce[0] val = 0x%lx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } + return ret; } static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d8680b589d..a437267c6b 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa) return vpa_call(0x3, cpu, vpa); } +static inline long plpar_page_set_loaned(unsigned long vpa) +{ + return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0); +} + +static inline long plpar_page_set_active(unsigned long vpa) +{ + return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0); +} + extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 90beb444e1..063a0d2fba 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr) H_DABRX_KERNEL | H_DABRX_USER); } +#define CMO_CHARACTERISTICS_TOKEN 44 +#define CMO_MAXLENGTH 1026 + +/** + * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, + * handle that here. (Stolen from parse_system_parameter_string) + */ +void pSeries_cmo_feature_init(void) +{ + char *ptr, *key, *value, *end; + int call_status; + int PrPSP = -1; + int SecPSP = -1; + + pr_debug(" -> fw_cmo_feature_init()\n"); + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + CMO_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + + if (call_status != 0) { + spin_unlock(&rtas_data_buf_lock); + pr_debug("CMO not available\n"); + pr_debug(" <- fw_cmo_feature_init()\n"); + return; + } + + end = rtas_data_buf + CMO_MAXLENGTH - 2; + ptr = rtas_data_buf + 2; /* step over strlen value */ + key = value = ptr; + + while (*ptr && (ptr <= end)) { + /* Separate the key and value by replacing '=' with '\0' and + * point the value at the string after the '=' + */ + if (ptr[0] == '=') { + ptr[0] = '\0'; + value = ptr + 1; + } else if (ptr[0] == '\0' || ptr[0] == ',') { + /* Terminate the string containing the key/value pair */ + ptr[0] = '\0'; + + if (key == value) { + pr_debug("Malformed key/value pair\n"); + /* Never found a '=', end processing */ + break; + } + + if (0 == strcmp(key, "PrPSP")) + PrPSP = simple_strtol(value, NULL, 10); + else if (0 == strcmp(key, "SecPSP")) + SecPSP = simple_strtol(value, NULL, 10); + value = key = ptr + 1; + } + ptr++; + } + + if (PrPSP != -1 || SecPSP != -1) { + pr_info("CMO enabled\n"); + pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP); + powerpc_firmware_features |= FW_FEATURE_CMO; + } else + pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP); + spin_unlock(&rtas_data_buf_lock); + pr_debug(" <- fw_cmo_feature_init()\n"); +} + /* * Early initialization. Relocation is on but do not reference unbolted pages */ @@ -329,6 +399,7 @@ static void __init pSeries_init_early(void) else if (firmware_has_feature(FW_FEATURE_XDABR)) ppc_md.set_dabr = pseries_set_xdabr; + pSeries_cmo_feature_init(); iommu_init_early_pSeries(); pr_debug(" <- pSeries_init_early()\n"); diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index de8c8b542c..89639ecbf3 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl) } } -static void dart_build(struct iommu_table *tbl, long index, +static int dart_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index, } else { dart_dirty = 1; } + return 0; } diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 00527805e4..e5a6e2e845 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -52,7 +53,9 @@ #include #include #include +#include #include +#include #include #include "ibmveth.h" @@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter); static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter); static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter); +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; + #ifdef CONFIG_PROC_FS #define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; @@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc u32 i; u32 count = pool->size - atomic_read(&pool->available); u32 buffers_added = 0; + struct sk_buff *skb; + unsigned int free_index, index; + u64 correlator; + unsigned long lpar_rc; + dma_addr_t dma_addr; mb(); for(i = 0; i < count; ++i) { - struct sk_buff *skb; - unsigned int free_index, index; - u64 correlator; union ibmveth_buf_desc desc; - unsigned long lpar_rc; - dma_addr_t dma_addr; skb = alloc_skb(pool->buff_size, GFP_ATOMIC); @@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, pool->buff_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_addr)) + goto failure; + pool->free_map[free_index] = IBM_VETH_INVALID_MAP; pool->dma_addr[index] = dma_addr; pool->skbuff[index] = skb; @@ -267,25 +275,32 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc); - if(lpar_rc != H_SUCCESS) { - pool->free_map[free_index] = index; - pool->skbuff[index] = NULL; - if (pool->consumer_index == 0) - pool->consumer_index = pool->size - 1; - else - pool->consumer_index--; - dma_unmap_single(&adapter->vdev->dev, - pool->dma_addr[index], pool->buff_size, - DMA_FROM_DEVICE); - dev_kfree_skb_any(skb); - adapter->replenish_add_buff_failure++; - break; - } else { + if (lpar_rc != H_SUCCESS) + goto failure; + else { buffers_added++; adapter->replenish_add_buff_success++; } } + mb(); + atomic_add(buffers_added, &(pool->available)); + return; + +failure: + pool->free_map[free_index] = index; + pool->skbuff[index] = NULL; + if (pool->consumer_index == 0) + pool->consumer_index = pool->size - 1; + else + pool->consumer_index--; + if (!dma_mapping_error(dma_addr)) + dma_unmap_single(&adapter->vdev->dev, + pool->dma_addr[index], pool->buff_size, + DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + adapter->replenish_add_buff_failure++; + mb(); atomic_add(buffers_added, &(pool->available)); } @@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) adapter->replenish_task_cycles++; - for(i = 0; i < IbmVethNumBufferPools; i++) + for (i = (IbmVethNumBufferPools - 1); i >= 0; i--) if(adapter->rx_buff_pool[i].active) ibmveth_replenish_buffer_pool(adapter, &adapter->rx_buff_pool[i]); @@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter) if (adapter->rx_buff_pool[i].active) ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]); + + if (adapter->bounce_buffer != NULL) { + if (!dma_mapping_error(adapter->bounce_buffer_dma)) { + dma_unmap_single(&adapter->vdev->dev, + adapter->bounce_buffer_dma, + adapter->netdev->mtu + IBMVETH_BUFF_OH, + DMA_BIDIRECTIONAL); + adapter->bounce_buffer_dma = DMA_ERROR_CODE; + } + kfree(adapter->bounce_buffer); + adapter->bounce_buffer = NULL; + } } static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, @@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev) return rc; } + adapter->bounce_buffer = + kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL); + if (!adapter->bounce_buffer) { + ibmveth_error_printk("unable to allocate bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + adapter->bounce_buffer_dma = + dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer, + netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL); + if (dma_mapping_error(adapter->bounce_buffer_dma)) { + ibmveth_error_printk("unable to map bounce buffer\n"); + ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); + return -ENOMEM; + } + ibmveth_debug_printk("initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); @@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tx_packets = 0; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; + int used_bounce = 0; + unsigned long data_dma_addr; desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len; - desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data, - skb->len, DMA_TO_DEVICE); + data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); if (skb->ip_summed == CHECKSUM_PARTIAL && ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) { @@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) buf[1] = 0; } - if (dma_mapping_error(desc.fields.address)) { - ibmveth_error_printk("tx: unable to map xmit buffer\n"); + if (dma_mapping_error(data_dma_addr)) { + if (!firmware_has_feature(FW_FEATURE_CMO)) + ibmveth_error_printk("tx: unable to map xmit buffer\n"); + skb_copy_from_linear_data(skb, adapter->bounce_buffer, + skb->len); + desc.fields.address = adapter->bounce_buffer_dma; tx_map_failed++; - tx_dropped++; - goto out; - } + used_bounce = 1; + } else + desc.fields.address = data_dma_addr; /* send the frame. Arbitrarily set retrycount to 1024 */ correlator = 0; @@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) netdev->trans_start = jiffies; } - dma_unmap_single(&adapter->vdev->dev, desc.fields.address, - skb->len, DMA_TO_DEVICE); + if (!used_bounce) + dma_unmap_single(&adapter->vdev->dev, data_dma_addr, + skb->len, DMA_TO_DEVICE); out: spin_lock_irqsave(&adapter->stats_lock, flags); netdev->stats.tx_dropped += tx_dropped; @@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) { struct ibmveth_adapter *adapter = dev->priv; + struct vio_dev *viodev = adapter->vdev; int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH; - int reinit = 0; - int i, rc; + int i; if (new_mtu < IBMVETH_MAX_MTU) return -EINVAL; @@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu) if (i == IbmVethNumBufferPools) return -EINVAL; + /* Deactivate all the buffer pools so that the next loop can activate + only the buffer pools necessary to hold the new MTU */ + for (i = 0; i < IbmVethNumBufferPools; i++) + if (adapter->rx_buff_pool[i].active) { + ibmveth_free_buffer_pool(adapter, + &adapter->rx_buff_pool[i]); + adapter->rx_buff_pool[i].active = 0; + } + /* Look for an active buffer pool that can hold the new MTU */ for(i = 0; irx_buff_pool[i].active) { - adapter->rx_buff_pool[i].active = 1; - reinit = 1; - } + adapter->rx_buff_pool[i].active = 1; if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) { - if (reinit && netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev)) { adapter->pool_config = 1; ibmveth_close(adapter->netdev); adapter->pool_config = 0; dev->mtu = new_mtu; - if ((rc = ibmveth_open(adapter->netdev))) - return rc; - } else - dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); + return ibmveth_open(adapter->netdev); + } + dev->mtu = new_mtu; + vio_cmo_set_dev_desired(viodev, + ibmveth_get_desired_dma + (viodev)); return 0; } } @@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev) } #endif +/** + * ibmveth_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) +{ + struct net_device *netdev = dev_get_drvdata(&vdev->dev); + struct ibmveth_adapter *adapter; + unsigned long ret; + int i; + int rxqentries = 1; + + /* netdev inits at probe time along with the structures we need below*/ + if (netdev == NULL) + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + + adapter = netdev_priv(netdev); + + ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; + ret += IOMMU_PAGE_ALIGN(netdev->mtu); + + for (i = 0; i < IbmVethNumBufferPools; i++) { + /* add the size of the active receive buffers */ + if (adapter->rx_buff_pool[i].active) + ret += + adapter->rx_buff_pool[i].size * + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size); + rxqentries += adapter->rx_buff_pool[i].size; + } + /* add the size of the receive queue entries */ + ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + + return ret; +} + static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) { int rc, i; @@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev) ibmveth_proc_unregister_adapter(adapter); free_netdev(netdev); + dev_set_drvdata(&dev->dev, NULL); + return 0; } @@ -1402,14 +1507,15 @@ const char * buf, size_t count) return -EPERM; } - pool->active = 0; if (netif_running(netdev)) { adapter->pool_config = 1; ibmveth_close(netdev); + pool->active = 0; adapter->pool_config = 0; if ((rc = ibmveth_open(netdev))) return rc; } + pool->active = 0; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) @@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = { .id_table = ibmveth_device_table, .probe = ibmveth_probe, .remove = ibmveth_remove, + .get_desired_dma = ibmveth_get_desired_dma, .driver = { .name = ibmveth_driver_name, .owner = THIS_MODULE, diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h index 41f61cd188..d281869487 100644 --- a/drivers/net/ibmveth.h +++ b/drivers/net/ibmveth.h @@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address, plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) #define IbmVethNumBufferPools 5 +#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */ #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ #define IBMVETH_MAX_MTU 68 #define IBMVETH_MAX_POOL_COUNT 4096 +#define IBMVETH_BUFF_LIST_SIZE 4096 +#define IBMVETH_FILT_LIST_SIZE 4096 #define IBMVETH_MAX_BUF_SIZE (1024 * 128) static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; @@ -143,6 +146,8 @@ struct ibmveth_adapter { struct ibmveth_rx_q rx_queue; int pool_config; int rx_csum; + void *bounce_buffer; + dma_addr_t bounce_buffer_dma; /* adapter specific stats */ u64 replenish_task_cycles; diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c index 5c015d310d..344e1b03dd 100644 --- a/drivers/of/of_i2c.c +++ b/drivers/of/of_i2c.c @@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap, } info.irq = irq_of_parse_and_map(node, 0); - if (info.irq == NO_IRQ) - info.irq = -1; if (of_find_i2c_driver(node, &info) < 0) { irq_dispose_mapping(info.irq); diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index eb702b96d5..c4a7c06793 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev) return 0; } +/** + * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver + * @vdev: vio device struct + * + * Return value: + * Number of bytes the driver will need to DMA map at the same time in + * order to perform well. + */ +static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev) +{ + unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu); + return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun); +} + static struct vio_device_id ibmvfc_device_table[] __devinitdata = { {"fcp", "IBM,vfc-client"}, { "", "" } @@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = { .id_table = ibmvfc_device_table, .probe = ibmvfc_probe, .remove = ibmvfc_remove, + .get_desired_dma = ibmvfc_get_desired_dma, .driver = { .name = IBMVFC_NAME, .owner = THIS_MODULE, diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 5d23368a1b..20000ec79b 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd, SG_ALL * sizeof(struct srp_direct_buf), &evt_struct->ext_list_token, 0); if (!evt_struct->ext_list) { - sdev_printk(KERN_ERR, cmd->device, - "Can't allocate memory for indirect table\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + sdev_printk(KERN_ERR, cmd->device, + "Can't allocate memory " + "for indirect table\n"); return 0; } } @@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd, srp_cmd->lun = ((u64) lun) << 48; if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { - sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + sdev_printk(KERN_ERR, cmnd->device, + "couldn't convert cmd to srp_cmd\n"); free_event_struct(&hostdata->pool, evt_struct); return SCSI_MLQUEUE_HOST_BUSY; } @@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata) DMA_BIDIRECTIONAL); if (dma_mapping_error(req->buffer)) { - dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(hostdata->dev, + "Unable to map request_buffer for " + "adapter_info!\n"); free_event_struct(&hostdata->pool, evt_struct); return; } @@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata, DMA_BIDIRECTIONAL); if (dma_mapping_error(host_config->buffer)) { - dev_err(hostdata->dev, "dma_mapping error getting host config\n"); + if (!firmware_has_feature(FW_FEATURE_CMO)) + dev_err(hostdata->dev, + "dma_mapping error getting host config\n"); free_event_struct(&hostdata->pool, evt_struct); return -1; } @@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = { .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, .slave_configure = ibmvscsi_slave_configure, .change_queue_depth = ibmvscsi_change_queue_depth, - .cmd_per_lun = 16, + .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT, .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, .this_id = -1, .sg_tablesize = SG_ALL, @@ -1612,6 +1622,26 @@ static struct scsi_host_template driver_template = { .shost_attrs = ibmvscsi_attrs, }; +/** + * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver + * + * @vdev: struct vio_dev for the device whose desired IO mem is to be returned + * + * Return value: + * Number of bytes of IO data the driver will need to perform well. + */ +static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev) +{ + /* iu_storage data allocated in initialize_event_pool */ + unsigned long desired_io = max_requests * sizeof(union viosrp_iu); + + /* add io space for sg data */ + desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT * + IBMVSCSI_CMDS_PER_LUN_DEFAULT); + + return desired_io; +} + /** * Called by bus code for each adapter */ @@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) hostdata->host = host; hostdata->dev = dev; atomic_set(&hostdata->request_limit, -1); - hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */ + hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT; rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests); if (rc != 0 && rc != H_RESOURCE) { @@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = { .id_table = ibmvscsi_device_table, .probe = ibmvscsi_probe, .remove = ibmvscsi_remove, + .get_desired_dma = ibmvscsi_get_desired_dma, .driver = { .name = "ibmvscsi", .owner = THIS_MODULE, diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h index 46e850e302..2d4339d5e1 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.h +++ b/drivers/scsi/ibmvscsi/ibmvscsi.h @@ -45,6 +45,8 @@ struct Scsi_Host; #define MAX_INDIRECT_BUFS 10 #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 +#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16 +#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */ #define IBMVSCSI_MAX_CMDS_PER_LUN 64 /* ------------------------------------------------------------ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 88d180306c..3b6ff854d9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss) #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) #endif +#ifndef ELF_BASE_PLATFORM +/* + * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. + * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value + * will be copied to the user stack in the same manner as AT_PLATFORM. + */ +#define ELF_BASE_PLATFORM NULL +#endif + static int create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, unsigned long load_addr, unsigned long interp_load_addr) @@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, elf_addr_t __user *envp; elf_addr_t __user *sp; elf_addr_t __user *u_platform; + elf_addr_t __user *u_base_platform; const char *k_platform = ELF_PLATFORM; + const char *k_base_platform = ELF_BASE_PLATFORM; int items; elf_addr_t *elf_info; int ei_index = 0; @@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; } + /* + * If this architecture has a "base" platform capability + * string, copy it to userspace. + */ + u_base_platform = NULL; + if (k_base_platform) { + size_t len = strlen(k_base_platform) + 1; + + u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); + if (__copy_to_user(u_base_platform, k_base_platform, len)) + return -EFAULT; + } + /* Create the ELF interpreter info */ elf_info = (elf_addr_t *)current->mm->saved_auxv; /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ @@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t)(unsigned long)u_platform); } + if (k_base_platform) { + NEW_AUX_ENT(AT_BASE_PLATFORM, + (elf_addr_t)(unsigned long)u_base_platform); + } if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); } diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 2a3e9075a5..ef8a248dfd 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr); extern void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end); +extern const char *powerpc_base_platform; + #endif /* __ASSEMBLY__ */ /* CPU kernel features */ diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index 89664675b4..80d1f399ee 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t; #define ELF_PLATFORM (cur_cpu_spec->platform) +/* While ELF_PLATFORM indicates the ISA supported by the platform, it + * may not accurately reflect the underlying behavior of the hardware + * (as in the case of running in Power5+ compatibility mode on a + * Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries + * that are tuned for the real hardware. + */ +#define ELF_BASE_PLATFORM (powerpc_base_platform) + #ifdef __powerpc64__ # define ELF_PLAT_INIT(_r, load_addr) do { \ _r->gpr[2] = load_addr; \ diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h index ef328995ba..3a17982752 100644 --- a/include/asm-powerpc/firmware.h +++ b/include/asm-powerpc/firmware.h @@ -46,6 +46,7 @@ #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) #define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000) +#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000) #ifndef __ASSEMBLY__ @@ -58,7 +59,7 @@ enum { FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | - FW_FEATURE_SPLPAR | FW_FEATURE_LPAR, + FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h index bf6cd7cb99..fbe2932fa9 100644 --- a/include/asm-powerpc/hvcall.h +++ b/include/asm-powerpc/hvcall.h @@ -92,6 +92,11 @@ #define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */ #define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */ #define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */ +#define H_PAGE_STATE_CHANGE (1UL<<(63-28)) +#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30))) +#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED) +#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31))) +#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE #define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */ #define H_ANDCOND (1UL<<(63-33)) #define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */ @@ -210,7 +215,9 @@ #define H_JOIN 0x298 #define H_VASI_STATE 0x2A4 #define H_ENABLE_CRQ 0x2B0 -#define MAX_HCALL_OPCODE H_ENABLE_CRQ +#define H_SET_MPP 0x2D0 +#define H_GET_MPP 0x2D4 +#define MAX_HCALL_OPCODE H_GET_MPP #ifndef __ASSEMBLY__ @@ -270,6 +277,20 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) +struct hvcall_mpp_data { + unsigned long entitled_mem; + unsigned long mapped_mem; + unsigned short group_num; + unsigned short pool_num; + unsigned char mem_weight; + unsigned char unallocated_mem_weight; + unsigned long unallocated_entitlement; /* value in bytes */ + unsigned long pool_size; + signed long loan_request; + unsigned long backing_mem; +}; + +int h_get_mpp(struct hvcall_mpp_data *); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h index 567ed92cd9..2fe268b103 100644 --- a/include/asm-powerpc/lppaca.h +++ b/include/asm-powerpc/lppaca.h @@ -125,7 +125,10 @@ struct lppaca { // NOTE: This value will ALWAYS be zero for dedicated processors and // will NEVER be zero for shared processors (ie, initialized to a 1). volatile u32 yield_count; // PLIC increments each dispatchx00-x03 - u8 reserved6[124]; // Reserved x04-x7F + u32 reserved6; + volatile u64 cmo_faults; // CMO page fault count x08-x0F + volatile u64 cmo_fault_time; // CMO page fault time x10-x17 + u8 reserved7[104]; // Reserved x18-x7F //============================================================================= // CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index 1233d735fd..893aafd87f 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -76,7 +76,7 @@ struct machdep_calls { * destroyed as well */ void (*hpte_clear_all)(void); - void (*tce_build)(struct iommu_table * tbl, + int (*tce_build)(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h index 710c5d36ef..8917ed6305 100644 --- a/include/asm-powerpc/mpc52xx_psc.h +++ b/include/asm-powerpc/mpc52xx_psc.h @@ -60,10 +60,12 @@ #define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002 #define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 -/* PSC interrupt mask bits */ +/* PSC interrupt status/mask bits */ #define MPC52xx_PSC_IMR_TXRDY 0x0100 #define MPC52xx_PSC_IMR_RXRDY 0x0200 #define MPC52xx_PSC_IMR_DB 0x0400 +#define MPC52xx_PSC_IMR_TXEMP 0x0800 +#define MPC52xx_PSC_IMR_ORERR 0x1000 #define MPC52xx_PSC_IMR_IPC 0x8000 /* PSC input port change bit */ @@ -92,6 +94,34 @@ #define MPC52xx_PSC_RFNUM_MASK 0x01ff +#define MPC52xx_PSC_SICR_DTS1 (1 << 29) +#define MPC52xx_PSC_SICR_SHDR (1 << 28) +#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24) +#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24) +#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24) +#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24) +#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24) +#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24) +#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24) +#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24) +#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24) +#define MPC52xx_PSC_SICR_GENCLK (1 << 23) +#define MPC52xx_PSC_SICR_I2S (1 << 22) +#define MPC52xx_PSC_SICR_CLKPOL (1 << 21) +#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20) +#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19) +#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18) +#define MPC52xx_PSC_SICR_ESAI (1 << 17) +#define MPC52xx_PSC_SICR_ENAC97 (1 << 16) +#define MPC52xx_PSC_SICR_SPI (1 << 15) +#define MPC52xx_PSC_SICR_MSTR (1 << 14) +#define MPC52xx_PSC_SICR_CPOL (1 << 13) +#define MPC52xx_PSC_SICR_CPHA (1 << 12) +#define MPC52xx_PSC_SICR_USEEOF (1 << 11) +#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10) /* Structure of the hardware registers */ struct mpc52xx_psc { @@ -132,8 +162,12 @@ struct mpc52xx_psc { u8 reserved5[3]; u8 ctlr; /* PSC + 0x1c */ u8 reserved6[3]; - u16 ccr; /* PSC + 0x20 */ - u8 reserved7[14]; + /* BitClkDiv field of CCR is byte swapped in + * the hardware for mpc5200/b compatibility */ + u32 ccr; /* PSC + 0x20 */ + u32 ac97_slots; /* PSC + 0x24 */ + u32 ac97_cmd; /* PSC + 0x28 */ + u32 ac97_data; /* PSC + 0x2c */ u8 ivr; /* PSC + 0x30 */ u8 reserved8[3]; u8 ip; /* PSC + 0x34 */ diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h index d18ffe7bc7..dbb8ca172e 100644 --- a/include/asm-powerpc/pgtable.h +++ b/include/asm-powerpc/pgtable.h @@ -38,6 +38,19 @@ extern void paging_init(void); remap_pfn_range(vma, vaddr, pfn, size, prot) #include + + +/* + * This gets called at the end of handling a page fault, when + * the kernel has put a new PTE into the page table for the process. + * We use it to ensure coherency between the i-cache and d-cache + * for the page which has just been mapped in. + * On machines which use an MMU hash table, we use this to put a + * corresponding HPTE into the hash table ahead of time, instead of + * waiting for the inevitable extra hash-table miss exception. + */ +extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h index 2b8a458f99..eb8eb400c6 100644 --- a/include/asm-powerpc/syscalls.h +++ b/include/asm-powerpc/syscalls.h @@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5, unsigned long p6, struct pt_regs *regs); asmlinkage long sys_pipe(int __user *fildes); +asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, struct sigaction __user *oact, size_t sigsetsize); diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h index ae7085c656..e084272ed1 100644 --- a/include/asm-powerpc/systbl.h +++ b/include/asm-powerpc/systbl.h @@ -316,3 +316,9 @@ COMPAT_SYS(fallocate) SYSCALL(subpage_prot) COMPAT_SYS_SPU(timerfd_settime) COMPAT_SYS_SPU(timerfd_gettime) +COMPAT_SYS_SPU(signalfd4) +SYSCALL_SPU(eventfd2) +SYSCALL_SPU(epoll_create1) +SYSCALL_SPU(dup3) +SYSCALL_SPU(pipe2) +SYSCALL(inotify_init1) diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index e6e25e2364..d6648c1433 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif extern int set_dabr(unsigned long dabr); +extern void do_dabr(struct pt_regs *regs, unsigned long address, + unsigned long error_code); extern void print_backtrace(unsigned long *); extern void show_regs(struct pt_regs * regs); extern void flush_instruction_cache(void); diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h index 5c91081476..361cd5c7a3 100644 --- a/include/asm-powerpc/tlbflush.h +++ b/include/asm-powerpc/tlbflush.h @@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, #endif -/* - * This gets called at the end of handling a page fault, when - * the kernel has put a new PTE into the page table for the process. - * We use it to ensure coherency between the i-cache and d-cache - * for the page which has just been mapped in. - * On machines which use an MMU hash table, we use this to put a - * corresponding HPTE into the hash table ahead of time, instead of - * waiting for the inevitable extra hash-table miss exception. - */ -extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); - #endif /*__KERNEL__ */ #endif /* _ASM_POWERPC_TLBFLUSH_H */ diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index ce91bb6620..e07d0c76ed 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -335,10 +335,16 @@ #define __NR_subpage_prot 310 #define __NR_timerfd_settime 311 #define __NR_timerfd_gettime 312 +#define __NR_signalfd4 313 +#define __NR_eventfd2 314 +#define __NR_epoll_create1 315 +#define __NR_dup3 316 +#define __NR_pipe2 317 +#define __NR_inotify_init1 318 #ifdef __KERNEL__ -#define __NR_syscalls 313 +#define __NR_syscalls 319 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h index 56512a968d..0a290a1959 100644 --- a/include/asm-powerpc/vio.h +++ b/include/asm-powerpc/vio.h @@ -39,16 +39,32 @@ #define VIO_IRQ_DISABLE 0UL #define VIO_IRQ_ENABLE 1UL +/* + * VIO CMO minimum entitlement for all devices and spare entitlement + */ +#define VIO_CMO_MIN_ENT 1562624 + struct iommu_table; -/* - * The vio_dev structure is used to describe virtual I/O devices. +/** + * vio_dev - This structure is used to describe virtual I/O devices. + * + * @desired: set from return of driver's get_desired_dma() function + * @entitled: bytes of IO data that has been reserved for this device. + * @allocated: bytes of IO data currently in use by the device. + * @allocs_failed: number of DMA failures due to insufficient entitlement. */ struct vio_dev { const char *name; const char *type; uint32_t unit_address; unsigned int irq; + struct { + size_t desired; + size_t entitled; + size_t allocated; + atomic_t allocs_failed; + } cmo; struct device dev; }; @@ -56,12 +72,19 @@ struct vio_driver { const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); int (*remove)(struct vio_dev *dev); + /* A driver must have a get_desired_dma() function to + * be loaded in a CMO environment if it uses DMA. + */ + unsigned long (*get_desired_dma)(struct vio_dev *dev); struct device_driver driver; }; extern int vio_register_driver(struct vio_driver *drv); extern void vio_unregister_driver(struct vio_driver *drv); +extern int vio_cmo_entitlement_update(size_t); +extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired); + extern void __devinit vio_unregister_device(struct vio_dev *dev); struct device_node; diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h index 0da17d14fd..d7afa9dd66 100644 --- a/include/linux/auxvec.h +++ b/include/linux/auxvec.h @@ -26,9 +26,13 @@ #define AT_SECURE 23 /* secure mode boolean */ +#define AT_BASE_PLATFORM 24 /* string identifying real platform, may + * differ from AT_PLATFORM. */ + #define AT_EXECFN 31 /* filename of program */ + #ifdef __KERNEL__ -#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */ +#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */ /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */ #endif