X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=arch%2Fx86%2Fkernel%2Ftlb_uv.c;h=d0fbb7712ab05d5ae0359c0b10db4287db742d9f;hb=a14ad05f47b55ea84136eb4da43ea96fa469326a;hp=28e7c68d9d78a8407f907d20afd3bf5f3c69909b;hpb=1812924bb1823950c1dc95c478b71b037057356e;p=linux-2.6 diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 28e7c68d9d..d0fbb7712a 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -10,28 +10,26 @@ #include #include -#include #include -#include -#include -#include #include +#include #include +#include +#include +#include -struct bau_control **uv_bau_table_bases; -static int uv_bau_retry_limit; -static int uv_nshift; /* position of pnode (which is nasid>>1) */ -static unsigned long uv_mmask; +#include -char *status_table[] = { - "IDLE", - "ACTIVE", - "DESTINATION TIMEOUT", - "SOURCE TIMEOUT" -}; +static struct bau_control **uv_bau_table_bases __read_mostly; +static int uv_bau_retry_limit __read_mostly; + +/* position of pnode (which is nasid>>1): */ +static int uv_nshift __read_mostly; + +static unsigned long uv_mmask __read_mostly; -DEFINE_PER_CPU(struct ptc_stats, ptcstats); -DEFINE_PER_CPU(struct bau_control, bau_control); +static DEFINE_PER_CPU(struct ptc_stats, ptcstats); +static DEFINE_PER_CPU(struct bau_control, bau_control); /* * Free a software acknowledge hardware resource by clearing its Pending @@ -41,39 +39,36 @@ DEFINE_PER_CPU(struct bau_control, bau_control); * clear of the Timeout bit (as well) will free the resource. No reply will * be sent (the hardware will only do one reply per message). */ -static void -uv_reply_to_message(int resource, - struct bau_payload_queue_entry *msg, - struct bau_msg_status *msp) +static void uv_reply_to_message(int resource, + struct bau_payload_queue_entry *msg, + struct bau_msg_status *msp) { - int fw; + unsigned long dw; - fw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); + dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); msg->replied_to = 1; msg->sw_ack_vector = 0; if (msp) msp->seen_by.bits = 0; - uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, fw); - return; + uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); } /* * Do all the things a cpu should do for a TLB shootdown message. * Other cpu's may come here at the same time for this message. */ -static void -uv_bau_process_message(struct bau_payload_queue_entry *msg, - int msg_slot, int sw_ack_slot) +static void uv_bau_process_message(struct bau_payload_queue_entry *msg, + int msg_slot, int sw_ack_slot) { - int cpu; unsigned long this_cpu_mask; struct bau_msg_status *msp; + int cpu; msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; cpu = uv_blade_processor_id(); msg->number_of_cpus = uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); - this_cpu_mask = (unsigned long)1 << cpu; + this_cpu_mask = 1UL << cpu; if (msp->seen_by.bits & this_cpu_mask) return; atomic_or_long(&msp->seen_by.bits, this_cpu_mask); @@ -94,159 +89,100 @@ uv_bau_process_message(struct bau_payload_queue_entry *msg, atomic_inc_short(&msg->acknowledge_count); if (msg->number_of_cpus == msg->acknowledge_count) uv_reply_to_message(sw_ack_slot, msg, msp); - return; } /* - * Examine the payload queue on all the distribution nodes to see + * Examine the payload queue on one distribution node to see * which messages have not been seen, and which cpu(s) have not seen them. * * Returns the number of cpu's that have not responded. */ -static int -uv_examine_destinations(struct bau_target_nodemask *distribution) +static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) { - int sender; - int i; - int j; - int k; - int count = 0; - struct bau_control *bau_tablesp; struct bau_payload_queue_entry *msg; struct bau_msg_status *msp; + int count = 0; + int i; + int j; - sender = smp_processor_id(); - for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE); - i++) { - if (bau_node_isset(i, distribution)) { - bau_tablesp = uv_bau_table_bases[i]; - for (msg = bau_tablesp->va_queue_first, j = 0; - j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) { - if ((msg->sending_cpu == sender) && - (!msg->replied_to)) { - msp = bau_tablesp->msg_statuses + j; - printk(KERN_DEBUG - "blade %d: address:%#lx %d of %d, not cpu(s): ", - i, msg->address, - msg->acknowledge_count, - msg->number_of_cpus); - for (k = 0; k < msg->number_of_cpus; - k++) { - if (!((long)1 << k & msp-> - seen_by.bits)) { - count++; - printk("%d ", k); - } - } - printk("\n"); + for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; + msg++, i++) { + if ((msg->sending_cpu == sender) && (!msg->replied_to)) { + msp = bau_tablesp->msg_statuses + i; + printk(KERN_DEBUG + "blade %d: address:%#lx %d of %d, not cpu(s): ", + i, msg->address, msg->acknowledge_count, + msg->number_of_cpus); + for (j = 0; j < msg->number_of_cpus; j++) { + if (!((1L << j) & msp->seen_by.bits)) { + count++; + printk("%d ", j); } } + printk("\n"); } } return count; } -/** - * uv_flush_tlb_others - globally purge translation cache of a virtual - * address or all TLB's - * @cpumaskp: mask of all cpu's in which the address is to be removed - * @mm: mm_struct containing virtual address range - * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) - * - * This is the entry point for initiating any UV global TLB shootdown. - * - * Purges the translation caches of all specified processors of the given - * virtual address, or purges all TLB's on specified processors. - * - * The caller has derived the cpumaskp from the mm_struct and has subtracted - * the local cpu from the mask. This function is called only if there - * are bits set in the mask. (e.g. flush_tlb_page()) +/* + * Examine the payload queue on all the distribution nodes to see + * which messages have not been seen, and which cpu(s) have not seen them. * - * The cpumaskp is converted into a nodemask of the nodes containing - * the cpus. + * Returns the number of cpu's that have not responded. */ -int -uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va) +static int uv_examine_destinations(struct bau_target_nodemask *distribution) { + int sender; int i; - int blade; - int cpu; - int bit; - int right_shift; - int this_blade; - int exams = 0; - int tries = 0; - long source_timeouts = 0; - long destination_timeouts = 0; - unsigned long index; - unsigned long mmr_offset; - unsigned long descriptor_status; - struct bau_activation_descriptor *bau_desc; - ktime_t time1, time2; - - cpu = uv_blade_processor_id(); - this_blade = uv_numa_blade_id(); - bau_desc = __get_cpu_var(bau_control).descriptor_base; - bau_desc += (UV_ITEMS_PER_DESCRIPTOR * cpu); - - bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); + int count = 0; - i = 0; - for_each_cpu_mask(bit, *cpumaskp) { - blade = uv_cpu_to_blade_id(bit); - if (blade > (UV_DISTRIBUTION_SIZE - 1)) - BUG(); - if (blade == this_blade) + sender = smp_processor_id(); + for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { + if (!bau_node_isset(i, distribution)) continue; - bau_node_set(blade, &bau_desc->distribution); - /* leave the bits for the remote cpu's in the mask until - success; on failure we fall back to the IPI method */ - i++; - } - if (i == 0) - goto none_to_flush; - __get_cpu_var(ptcstats).requestor++; - __get_cpu_var(ptcstats).ntargeted += i; - - bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = smp_processor_id(); - - if (cpu < UV_CPUS_PER_ACT_STATUS) { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - } else { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - right_shift = - ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); + count += uv_examine_destination(uv_bau_table_bases[i], sender); } - time1 = ktime_get(); + return count; +} -retry: - tries++; - index = ((unsigned long) - 1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu; - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); +/* + * wait for completion of a broadcast message + * + * return COMPLETE, RETRY or GIVEUP + */ +static int uv_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift) +{ + int exams = 0; + long destination_timeouts = 0; + long source_timeouts = 0; + unsigned long descriptor_status; while ((descriptor_status = (((unsigned long) - uv_read_local_mmr(mmr_offset) >> - right_shift) & UV_ACT_STATUS_MASK)) != - DESC_STATUS_IDLE) { + uv_read_local_mmr(mmr_offset) >> + right_shift) & UV_ACT_STATUS_MASK)) != + DESC_STATUS_IDLE) { if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { source_timeouts++; if (source_timeouts > SOURCE_TIMEOUT_LIMIT) source_timeouts = 0; __get_cpu_var(ptcstats).s_retry++; - goto retry; + return FLUSH_RETRY; } - /* spin here looking for progress at the destinations */ + /* + * spin here looking for progress at the destinations + */ if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { destination_timeouts++; if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { - /* returns # of cpus not responding */ + /* + * returns number of cpus not responding + */ if (uv_examine_destinations (&bau_desc->distribution) == 0) { __get_cpu_var(ptcstats).d_retry++; - goto retry; + return FLUSH_RETRY; } exams++; if (exams >= uv_bau_retry_limit) { @@ -254,38 +190,156 @@ retry: "uv_flush_tlb_others"); printk("giving up on cpu %d\n", smp_processor_id()); - goto unsuccessful; + return FLUSH_GIVEUP; } - /* delays can hang up the simulator + /* + * delays can hang the simulator udelay(1000); */ destination_timeouts = 0; } } } + return FLUSH_COMPLETE; +} + +/** + * uv_flush_send_and_wait + * + * Send a broadcast and wait for a broadcast message to complete. + * + * The cpumaskp mask contains the cpus the broadcast was sent to. + * + * Returns 1 if all remote flushing was done. The mask is zeroed. + * Returns 0 if some remote flushing remains to be done. The mask is left + * unchanged. + */ +int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, + cpumask_t *cpumaskp) +{ + int completion_status = 0; + int right_shift; + int tries = 0; + int blade; + int bit; + unsigned long mmr_offset; + unsigned long index; + cycles_t time1; + cycles_t time2; + + if (cpu < UV_CPUS_PER_ACT_STATUS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = + ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); + } + time1 = get_cycles(); + do { + tries++; + index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | + cpu; + uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); + completion_status = uv_wait_completion(bau_desc, mmr_offset, + right_shift); + } while (completion_status == FLUSH_RETRY); + time2 = get_cycles(); + __get_cpu_var(ptcstats).sflush += (time2 - time1); if (tries > 1) __get_cpu_var(ptcstats).retriesok++; - /* on success, clear the remote cpu's from the mask so we don't - use the IPI method of shootdown on them */ + + if (completion_status == FLUSH_GIVEUP) { + /* + * Cause the caller to do an IPI-style TLB shootdown on + * the cpu's, all of which are still in the mask. + */ + __get_cpu_var(ptcstats).ptc_i++; + return 0; + } + + /* + * Success, so clear the remote cpu's from the mask so we don't + * use the IPI method of shootdown on them. + */ for_each_cpu_mask(bit, *cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; cpu_clear(bit, *cpumaskp); } + if (!cpus_empty(*cpumaskp)) + return 0; + return 1; +} -unsuccessful: - time2 = ktime_get(); - __get_cpu_var(ptcstats).sflush_ns += (time2.tv64 - time1.tv64); +/** + * uv_flush_tlb_others - globally purge translation cache of a virtual + * address or all TLB's + * @cpumaskp: mask of all cpu's in which the address is to be removed + * @mm: mm_struct containing virtual address range + * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * + * This is the entry point for initiating any UV global TLB shootdown. + * + * Purges the translation caches of all specified processors of the given + * virtual address, or purges all TLB's on specified processors. + * + * The caller has derived the cpumaskp from the mm_struct and has subtracted + * the local cpu from the mask. This function is called only if there + * are bits set in the mask. (e.g. flush_tlb_page()) + * + * The cpumaskp is converted into a nodemask of the nodes containing + * the cpus. + * + * Returns 1 if all remote flushing was done. + * Returns 0 if some remote flushing remains to be done. + */ +int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, + unsigned long va) +{ + int i; + int bit; + int blade; + int cpu; + int this_blade; + int locals = 0; + struct bau_desc *bau_desc; + + cpu = uv_blade_processor_id(); + this_blade = uv_numa_blade_id(); + bau_desc = __get_cpu_var(bau_control).descriptor_base; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; -none_to_flush: - if (cpus_empty(*cpumaskp)) - return 1; + bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - /* Cause the caller to do an IPI-style TLB shootdown on - the cpu's still in the mask */ - __get_cpu_var(ptcstats).ptc_i++; - return 0; + i = 0; + for_each_cpu_mask(bit, *cpumaskp) { + blade = uv_cpu_to_blade_id(bit); + BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); + if (blade == this_blade) { + locals++; + continue; + } + bau_node_set(blade, &bau_desc->distribution); + i++; + } + if (i == 0) { + /* + * no off_node flushing; return status for local node + */ + if (locals) + return 0; + else + return 1; + } + __get_cpu_var(ptcstats).requestor++; + __get_cpu_var(ptcstats).ntargeted += i; + + bau_desc->payload.address = va; + bau_desc->payload.sending_cpu = smp_processor_id(); + + return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); } /* @@ -302,13 +356,14 @@ none_to_flush: * (the resource will not be freed until noninterruptable cpus see this * interrupt; hardware will timeout the s/w ack and reply ERROR) */ -void -uv_bau_message_interrupt(struct pt_regs *regs) +void uv_bau_message_interrupt(struct pt_regs *regs) { - struct bau_payload_queue_entry *pqp; + struct bau_payload_queue_entry *va_queue_first; + struct bau_payload_queue_entry *va_queue_last; struct bau_payload_queue_entry *msg; struct pt_regs *old_regs = set_irq_regs(regs); - ktime_t time1, time2; + cycles_t time1; + cycles_t time2; int msg_slot; int sw_ack_slot; int fw; @@ -319,23 +374,25 @@ uv_bau_message_interrupt(struct pt_regs *regs) exit_idle(); irq_enter(); - time1 = ktime_get(); + time1 = get_cycles(); local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); - pqp = __get_cpu_var(bau_control).va_queue_first; + va_queue_first = __get_cpu_var(bau_control).va_queue_first; + va_queue_last = __get_cpu_var(bau_control).va_queue_last; + msg = __get_cpu_var(bau_control).bau_msg_head; while (msg->sw_ack_vector) { count++; fw = msg->sw_ack_vector; - msg_slot = msg - pqp; + msg_slot = msg - va_queue_first; sw_ack_slot = ffs(fw) - 1; uv_bau_process_message(msg, msg_slot, sw_ack_slot); msg++; - if (msg > __get_cpu_var(bau_control).va_queue_last) - msg = __get_cpu_var(bau_control).va_queue_first; + if (msg > va_queue_last) + msg = va_queue_first; __get_cpu_var(bau_control).bau_msg_head = msg; } if (!count) @@ -343,16 +400,14 @@ uv_bau_message_interrupt(struct pt_regs *regs) else if (count > 1) __get_cpu_var(ptcstats).multmsg++; - time2 = ktime_get(); - __get_cpu_var(ptcstats).dflush_ns += (time2.tv64 - time1.tv64); + time2 = get_cycles(); + __get_cpu_var(ptcstats).dflush += (time2 - time1); irq_exit(); set_irq_regs(old_regs); - return; } -static void -uv_enable_timeouts(void) +static void uv_enable_timeouts(void) { int i; int blade; @@ -361,7 +416,6 @@ uv_enable_timeouts(void) int cur_cpu = 0; unsigned long apicid; - /* better if we had each_online_blade */ last_blade = -1; for_each_online_node(i) { blade = uv_node_to_blade_id(i); @@ -372,19 +426,16 @@ uv_enable_timeouts(void) pnode = uv_blade_to_pnode(blade); cur_cpu += uv_blade_nr_possible_cpus(i); } - return; } -static void * -uv_ptc_seq_start(struct seq_file *file, loff_t *offset) +static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) { if (*offset < num_possible_cpus()) return offset; return NULL; } -static void * -uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) { (*offset)++; if (*offset < num_possible_cpus()) @@ -392,8 +443,7 @@ uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) return NULL; } -static void -uv_ptc_seq_stop(struct seq_file *file, void *data) +static void uv_ptc_seq_stop(struct seq_file *file, void *data) { } @@ -401,8 +451,7 @@ uv_ptc_seq_stop(struct seq_file *file, void *data) * Display the statistics thru /proc * data points to the cpu number */ -static int -uv_ptc_seq_show(struct seq_file *file, void *data) +static int uv_ptc_seq_show(struct seq_file *file, void *data) { struct ptc_stats *stat; int cpu; @@ -413,7 +462,7 @@ uv_ptc_seq_show(struct seq_file *file, void *data) seq_printf(file, "# cpu requestor requestee one all sretry dretry ptc_i "); seq_printf(file, - "sw_ack sflush_us dflush_us sok dnomsg dmult starget\n"); + "sw_ack sflush dflush sok dnomsg dmult starget\n"); } if (cpu < num_possible_cpus() && cpu_online(cpu)) { stat = &per_cpu(ptcstats, cpu); @@ -425,7 +474,7 @@ uv_ptc_seq_show(struct seq_file *file, void *data) uv_read_global_mmr64(uv_blade_to_pnode (uv_cpu_to_blade_id(cpu)), UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), - stat->sflush_ns / 1000, stat->dflush_ns / 1000, + stat->sflush, stat->dflush, stat->retriesok, stat->nomsg, stat->multmsg, stat->ntargeted); } @@ -437,13 +486,14 @@ uv_ptc_seq_show(struct seq_file *file, void *data) * 0: display meaning of the statistics * >0: retry limit */ -static ssize_t -uv_ptc_proc_write(struct file *file, const char __user *user, - size_t count, loff_t *data) +static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) { long newmode; char optstr[64]; + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; if (copy_from_user(optstr, user, count)) return -EFAULT; optstr[count - 1] = '\0'; @@ -471,9 +521,9 @@ uv_ptc_proc_write(struct file *file, const char __user *user, printk(KERN_DEBUG "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); printk(KERN_DEBUG - "sflush_us: microseconds spent in uv_flush_tlb_others()\n"); + "sflush_us: cycles spent in uv_flush_tlb_others()\n"); printk(KERN_DEBUG - "dflush_us: microseconds spent in handling flush requests\n"); + "dflush_us: cycles spent in handling flush requests\n"); printk(KERN_DEBUG "sok: successes on retry\n"); printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); printk(KERN_DEBUG @@ -489,248 +539,254 @@ uv_ptc_proc_write(struct file *file, const char __user *user, } static const struct seq_operations uv_ptc_seq_ops = { - .start = uv_ptc_seq_start, - .next = uv_ptc_seq_next, - .stop = uv_ptc_seq_stop, - .show = uv_ptc_seq_show + .start = uv_ptc_seq_start, + .next = uv_ptc_seq_next, + .stop = uv_ptc_seq_stop, + .show = uv_ptc_seq_show }; -static int -uv_ptc_proc_open(struct inode *inode, struct file *file) +static int uv_ptc_proc_open(struct inode *inode, struct file *file) { return seq_open(file, &uv_ptc_seq_ops); } static const struct file_operations proc_uv_ptc_operations = { - .open = uv_ptc_proc_open, - .read = seq_read, - .write = uv_ptc_proc_write, - .llseek = seq_lseek, - .release = seq_release, + .open = uv_ptc_proc_open, + .read = seq_read, + .write = uv_ptc_proc_write, + .llseek = seq_lseek, + .release = seq_release, }; -static struct proc_dir_entry *proc_uv_ptc; - -static int __init -uv_ptc_init(void) +static int __init uv_ptc_init(void) { - static struct proc_dir_entry *sgi_proc_dir; - - sgi_proc_dir = NULL; + struct proc_dir_entry *proc_uv_ptc; if (!is_uv_system()) return 0; - sgi_proc_dir = proc_mkdir("sgi_uv", NULL); - if (!sgi_proc_dir) + if (!proc_mkdir("sgi_uv", NULL)) return -EINVAL; proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); if (!proc_uv_ptc) { printk(KERN_ERR "unable to create %s proc entry\n", UV_PTC_BASENAME); + remove_proc_entry("sgi_uv", NULL); return -EINVAL; } proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; return 0; } -static void __exit -uv_ptc_exit(void) +/* + * begin the initialization of the per-blade control structures + */ +static struct bau_control * __init uv_table_bases_init(int blade, int node) { - remove_proc_entry(UV_PTC_BASENAME, NULL); + int i; + int *ip; + struct bau_msg_status *msp; + struct bau_control *bau_tabp; + + bau_tabp = + kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); + BUG_ON(!bau_tabp); + + bau_tabp->msg_statuses = + kmalloc_node(sizeof(struct bau_msg_status) * + DEST_Q_SIZE, GFP_KERNEL, node); + BUG_ON(!bau_tabp->msg_statuses); + + for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) + bau_cpubits_clear(&msp->seen_by, (int) + uv_blade_nr_possible_cpus(blade)); + + bau_tabp->watching = + kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); + BUG_ON(!bau_tabp->watching); + + for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) + *ip = 0; + + uv_bau_table_bases[blade] = bau_tabp; + + return bau_tabp; } -module_init(uv_ptc_init); -module_exit(uv_ptc_exit); +/* + * finish the initialization of the per-blade control structures + */ +static void __init +uv_table_bases_finish(int blade, int node, int cur_cpu, + struct bau_control *bau_tablesp, + struct bau_desc *adp) +{ + struct bau_control *bcp; + int i; + + for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) { + bcp = (struct bau_control *)&per_cpu(bau_control, i); + + bcp->bau_msg_head = bau_tablesp->va_queue_first; + bcp->va_queue_first = bau_tablesp->va_queue_first; + bcp->va_queue_last = bau_tablesp->va_queue_last; + bcp->watching = bau_tablesp->watching; + bcp->msg_statuses = bau_tablesp->msg_statuses; + bcp->descriptor_base = adp; + } +} /* - * Initialization of BAU-related structures + * initialize the sending side's sending buffers */ -int __init -uv_bau_init(void) +static struct bau_desc * __init +uv_activation_descriptor_init(int node, int pnode) { int i; - int j; - int blade; - int nblades; - int *ip; - int pnode; - int last_blade; - int cur_cpu = 0; unsigned long pa; - unsigned long n; unsigned long m; + unsigned long n; unsigned long mmr_image; - unsigned long apicid; + struct bau_desc *adp; + struct bau_desc *ad2; + + adp = (struct bau_desc *) + kmalloc_node(16384, GFP_KERNEL, node); + BUG_ON(!adp); + + pa = __pa((unsigned long)adp); + n = pa >> uv_nshift; + m = pa & uv_mmask; + + mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE); + if (mmr_image) { + uv_write_global_mmr64(pnode, (unsigned long) + UVH_LB_BAU_SB_DESCRIPTOR_BASE, + (n << UV_DESC_BASE_PNODE_SHIFT | m)); + } + + for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { + memset(ad2, 0, sizeof(struct bau_desc)); + ad2->header.sw_ack_flag = 1; + ad2->header.base_dest_nodeid = + uv_blade_to_pnode(uv_cpu_to_blade_id(0)); + ad2->header.command = UV_NET_ENDPOINT_INTD; + ad2->header.int_both = 1; + /* + * all others need to be set to zero: + * fairness chaining multilevel count replied_to + */ + } + return adp; +} + +/* + * initialize the destination side's receiving buffers + */ +static struct bau_payload_queue_entry * __init +uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) +{ + struct bau_payload_queue_entry *pqp; char *cp; - struct bau_control *bau_tablesp; - struct bau_activation_descriptor *adp, *ad2; + + pqp = (struct bau_payload_queue_entry *) kmalloc_node( + (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), + GFP_KERNEL, node); + BUG_ON(!pqp); + + cp = (char *)pqp + 31; + pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); + bau_tablesp->va_queue_first = pqp; + uv_write_global_mmr64(pnode, + UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, + ((unsigned long)pnode << + UV_PAYLOADQ_PNODE_SHIFT) | + uv_physnodeaddr(pqp)); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, + uv_physnodeaddr(pqp)); + bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); + uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, + (unsigned long) + uv_physnodeaddr(bau_tablesp->va_queue_last)); + memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); + + return pqp; +} + +/* + * Initialization of each UV blade's structures + */ +static int __init uv_init_blade(int blade, int node, int cur_cpu) +{ + int pnode; + unsigned long pa; + unsigned long apicid; + struct bau_desc *adp; struct bau_payload_queue_entry *pqp; - struct bau_msg_status *msp; - struct bau_control *bcp; + struct bau_control *bau_tablesp; + + bau_tablesp = uv_table_bases_init(blade, node); + pnode = uv_blade_to_pnode(blade); + adp = uv_activation_descriptor_init(node, pnode); + pqp = uv_payload_queue_init(node, pnode, bau_tablesp); + uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp); + /* + * the below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS + */ + apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); + pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); + if ((pa & 0xff) != UV_BAU_MESSAGE) { + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, + ((apicid << 32) | UV_BAU_MESSAGE)); + } + return 0; +} + +/* + * Initialization of BAU-related structures + */ +static int __init uv_bau_init(void) +{ + int blade; + int node; + int nblades; + int last_blade; + int cur_cpu = 0; if (!is_uv_system()) return 0; uv_bau_retry_limit = 1; - - if ((sizeof(struct bau_local_cpumask) * BITSPERBYTE) < - MAX_CPUS_PER_NODE) { - printk(KERN_ERR - "uv_bau_init: bau_local_cpumask.bits too small\n"); - BUG(); - } - uv_nshift = uv_hub_info->n_val; - uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1; + uv_mmask = (1UL << uv_hub_info->n_val) - 1; nblades = 0; last_blade = -1; - for_each_online_node(i) { - blade = uv_node_to_blade_id(i); + for_each_online_node(node) { + blade = uv_node_to_blade_id(node); if (blade == last_blade) continue; last_blade = blade; nblades++; } - uv_bau_table_bases = (struct bau_control **) kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); - if (!uv_bau_table_bases) - BUG(); + BUG_ON(!uv_bau_table_bases); - /* better if we had each_online_blade */ last_blade = -1; - for_each_online_node(i) { - blade = uv_node_to_blade_id(i); + for_each_online_node(node) { + blade = uv_node_to_blade_id(node); if (blade == last_blade) continue; last_blade = blade; - - bau_tablesp = - kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, i); - if (!bau_tablesp) - BUG(); - - bau_tablesp->msg_statuses = - kmalloc_node(sizeof(struct bau_msg_status) * - DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, i); - if (!bau_tablesp->msg_statuses) - BUG(); - for (j = 0, msp = bau_tablesp->msg_statuses; - j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, msp++) { - bau_cpubits_clear(&msp->seen_by, (int) - uv_blade_nr_possible_cpus(blade)); - } - - bau_tablesp->watching = - kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES, - GFP_KERNEL, i); - if (!bau_tablesp->watching) - BUG(); - for (j = 0, ip = bau_tablesp->watching; - j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, ip++) { - *ip = 0; - } - - uv_bau_table_bases[i] = bau_tablesp; - - pnode = uv_blade_to_pnode(blade); - - if (sizeof(struct bau_activation_descriptor) != 64) - BUG(); - - adp = (struct bau_activation_descriptor *) - kmalloc_node(16384, GFP_KERNEL, i); - if (!adp) - BUG(); - if ((unsigned long)adp & 0xfff) - BUG(); - pa = __pa((unsigned long)adp); - n = pa >> uv_nshift; - m = pa & uv_mmask; - - mmr_image = uv_read_global_mmr64(pnode, - UVH_LB_BAU_SB_DESCRIPTOR_BASE); - if (mmr_image) - uv_write_global_mmr64(pnode, (unsigned long) - UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | - m)); - for (j = 0, ad2 = adp; j < UV_ACTIVATION_DESCRIPTOR_SIZE; - j++, ad2++) { - memset(ad2, 0, - sizeof(struct bau_activation_descriptor)); - ad2->header.sw_ack_flag = 1; - ad2->header.base_dest_nodeid = - uv_blade_to_pnode(uv_cpu_to_blade_id(0)); - ad2->header.command = UV_NET_ENDPOINT_INTD; - ad2->header.int_both = 1; - /* all others need to be set to zero: - fairness chaining multilevel count replied_to */ - } - - pqp = (struct bau_payload_queue_entry *) - kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) * - sizeof(struct bau_payload_queue_entry), - GFP_KERNEL, i); - if (!pqp) - BUG(); - if (sizeof(struct bau_payload_queue_entry) != 32) - BUG(); - if ((unsigned long)(&((struct bau_payload_queue_entry *)0)-> - sw_ack_vector) != 15) - BUG(); - - cp = (char *)pqp + 31; - pqp = (struct bau_payload_queue_entry *) - (((unsigned long)cp >> 5) << 5); - bau_tablesp->va_queue_first = pqp; - uv_write_global_mmr64(pnode, - UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, - ((unsigned long)pnode << - UV_PAYLOADQ_PNODE_SHIFT) | - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, - uv_physnodeaddr(pqp)); - bau_tablesp->va_queue_last = - pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, - (unsigned long) - uv_physnodeaddr(bau_tablesp-> - va_queue_last)); - memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * - DESTINATION_PAYLOAD_QUEUE_SIZE); - - /* this initialization can't be in firmware because the - messaging IRQ will be determined by the OS */ - apicid = per_cpu(x86_cpu_to_apicid, cur_cpu); - pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); - if ((pa & 0xff) != UV_BAU_MESSAGE) { - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, - ((apicid << 32) | - UV_BAU_MESSAGE)); - } - - for (j = cur_cpu; j < (cur_cpu + uv_blade_nr_possible_cpus(i)); - j++) { - bcp = (struct bau_control *)&per_cpu(bau_control, j); - bcp->bau_msg_head = bau_tablesp->va_queue_first; - bcp->va_queue_first = bau_tablesp->va_queue_first; - - bcp->va_queue_last = bau_tablesp->va_queue_last; - bcp->watching = bau_tablesp->watching; - bcp->msg_statuses = bau_tablesp->msg_statuses; - bcp->descriptor_base = adp; - } - cur_cpu += uv_blade_nr_possible_cpus(i); + uv_init_blade(blade, node, cur_cpu); + cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); - uv_enable_timeouts(); return 0; } - __initcall(uv_bau_init); +__initcall(uv_ptc_init);