2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/version.h>
18 #include <linux/module.h>
19 #include <linux/proc_fs.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22 #include <linux/smp.h>
23 #include <linux/init.h>
24 #include <linux/sysctl.h>
25 #include <linux/highmem.h>
26 #include <linux/timer.h>
27 #include <linux/slab.h>
28 #include <linux/jiffies.h>
29 #include <linux/spinlock.h>
30 #include <linux/list.h>
31 #include <linux/sysdev.h>
32 #include <linux/ctype.h>
34 #include <asm/uaccess.h>
40 #define EDAC_MC_VERSION "edac_mc Ver: 2.0.0 " __DATE__
42 #ifdef CONFIG_EDAC_DEBUG
43 /* Values of 0 to 4 will generate output */
44 int edac_debug_level = 1;
45 EXPORT_SYMBOL(edac_debug_level);
48 /* EDAC Controls, setable by module parameter, and sysfs */
49 static int log_ue = 1;
50 static int log_ce = 1;
51 static int panic_on_ue = 1;
52 static int poll_msec = 1000;
54 static int check_pci_parity = 0; /* default YES check PCI parity */
55 static int panic_on_pci_parity; /* default no panic on PCI Parity */
56 static atomic_t pci_parity_count = ATOMIC_INIT(0);
58 /* lock to memory controller's control array */
59 static DECLARE_MUTEX(mem_ctls_mutex);
60 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
62 /* Structure of the whitelist and blacklist arrays */
63 struct edac_pci_device_list {
64 unsigned int vendor; /* Vendor ID */
65 unsigned int device; /* Deviice ID */
69 #define MAX_LISTED_PCI_DEVICES 32
71 /* List of PCI devices (vendor-id:device-id) that should be skipped */
72 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
73 static int pci_blacklist_count;
75 /* List of PCI devices (vendor-id:device-id) that should be scanned */
76 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
77 static int pci_whitelist_count ;
79 /* START sysfs data and methods */
81 static const char *mem_types[] = {
82 [MEM_EMPTY] = "Empty",
83 [MEM_RESERVED] = "Reserved",
84 [MEM_UNKNOWN] = "Unknown",
88 [MEM_SDR] = "Unbuffered-SDR",
89 [MEM_RDR] = "Registered-SDR",
90 [MEM_DDR] = "Unbuffered-DDR",
91 [MEM_RDDR] = "Registered-DDR",
95 static const char *dev_types[] = {
96 [DEV_UNKNOWN] = "Unknown",
106 static const char *edac_caps[] = {
107 [EDAC_UNKNOWN] = "Unknown",
108 [EDAC_NONE] = "None",
109 [EDAC_RESERVED] = "Reserved",
110 [EDAC_PARITY] = "PARITY",
112 [EDAC_SECDED] = "SECDED",
113 [EDAC_S2ECD2ED] = "S2ECD2ED",
114 [EDAC_S4ECD4ED] = "S4ECD4ED",
115 [EDAC_S8ECD8ED] = "S8ECD8ED",
116 [EDAC_S16ECD16ED] = "S16ECD16ED"
120 /* sysfs object: /sys/devices/system/edac */
121 static struct sysdev_class edac_class = {
122 set_kset_name("edac"),
126 * /sys/devices/system/edac/mc
127 * /sys/devices/system/edac/pci
129 static struct kobject edac_memctrl_kobj;
130 static struct kobject edac_pci_kobj;
133 * /sys/devices/system/edac/mc;
134 * data structures and methods
136 static ssize_t memctrl_string_show(void *ptr, char *buffer)
138 char *value = (char*) ptr;
139 return sprintf(buffer, "%s\n", value);
142 static ssize_t memctrl_int_show(void *ptr, char *buffer)
144 int *value = (int*) ptr;
145 return sprintf(buffer, "%d\n", *value);
148 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
150 int *value = (int*) ptr;
152 if (isdigit(*buffer))
153 *value = simple_strtoul(buffer, NULL, 0);
158 struct memctrl_dev_attribute {
159 struct attribute attr;
161 ssize_t (*show)(void *,char *);
162 ssize_t (*store)(void *, const char *, size_t);
165 /* Set of show/store abstract level functions for memory control object */
167 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
169 struct memctrl_dev_attribute *memctrl_dev;
170 memctrl_dev = (struct memctrl_dev_attribute*)attr;
172 if (memctrl_dev->show)
173 return memctrl_dev->show(memctrl_dev->value, buffer);
178 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
179 const char *buffer, size_t count)
181 struct memctrl_dev_attribute *memctrl_dev;
182 memctrl_dev = (struct memctrl_dev_attribute*)attr;
184 if (memctrl_dev->store)
185 return memctrl_dev->store(memctrl_dev->value, buffer, count);
189 static struct sysfs_ops memctrlfs_ops = {
190 .show = memctrl_dev_show,
191 .store = memctrl_dev_store
194 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
195 struct memctrl_dev_attribute attr_##_name = { \
196 .attr = {.name = __stringify(_name), .mode = _mode }, \
202 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
203 struct memctrl_dev_attribute attr_##_name = { \
204 .attr = {.name = __stringify(_name), .mode = _mode }, \
210 /* cwrow<id> attribute f*/
211 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
213 /* csrow<id> control files */
214 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
215 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
216 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
217 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
220 /* Base Attributes of the memory ECC object */
221 static struct memctrl_dev_attribute *memctrl_attr[] = {
230 /* Main MC kobject release() function */
231 static void edac_memctrl_master_release(struct kobject *kobj)
233 debugf1("EDAC MC: " __FILE__ ": %s()\n", __func__);
236 static struct kobj_type ktype_memctrl = {
237 .release = edac_memctrl_master_release,
238 .sysfs_ops = &memctrlfs_ops,
239 .default_attrs = (struct attribute **) memctrl_attr,
243 /* Initialize the main sysfs entries for edac:
244 * /sys/devices/system/edac
251 static int edac_sysfs_memctrl_setup(void)
255 debugf1("MC: " __FILE__ ": %s()\n", __func__);
257 /* create the /sys/devices/system/edac directory */
258 err = sysdev_class_register(&edac_class);
260 /* Init the MC's kobject */
261 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
262 kobject_init(&edac_memctrl_kobj);
264 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
265 edac_memctrl_kobj.ktype = &ktype_memctrl;
267 /* generate sysfs "..../edac/mc" */
268 err = kobject_set_name(&edac_memctrl_kobj,"mc");
270 /* FIXME: maybe new sysdev_create_subdir() */
271 err = kobject_register(&edac_memctrl_kobj);
273 debugf1("Failed to register '.../edac/mc'\n");
275 debugf1("Registered '.../edac/mc' kobject\n");
279 debugf1(KERN_WARNING "__FILE__ %s() error=%d\n", __func__,err);
287 * the '..../edac/mc' kobject followed by '..../edac' itself
289 static void edac_sysfs_memctrl_teardown(void)
291 debugf0("MC: " __FILE__ ": %s()\n", __func__);
293 /* Unregister the MC's kobject */
294 kobject_unregister(&edac_memctrl_kobj);
296 /* release the master edac mc kobject */
297 kobject_put(&edac_memctrl_kobj);
299 /* Unregister the 'edac' object */
300 sysdev_class_unregister(&edac_class);
304 * /sys/devices/system/edac/pci;
305 * data structures and methods
308 struct list_control {
309 struct edac_pci_device_list *list;
313 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
314 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
316 struct list_control *listctl;
317 struct edac_pci_device_list *list;
323 list = listctl->list;
325 for (i = 0; i < *(listctl->count); i++, list++ ) {
327 len += snprintf(p + len, (PAGE_SIZE-len), ",");
329 len += snprintf(p + len,
332 list->vendor,list->device);
335 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
337 return (ssize_t) len;
342 * Scan string from **s to **e looking for one 'vendor:device' tuple
343 * where each field is a hex value
345 * return 0 if an entry is NOT found
346 * return 1 if an entry is found
347 * fill in *vendor_id and *device_id with values found
349 * In both cases, make sure *s has been moved forward toward *e
351 static int parse_one_device(const char **s,const char **e,
352 unsigned int *vendor_id, unsigned int *device_id)
354 const char *runner, *p;
356 /* if null byte, we are done */
358 (*s)++; /* keep *s moving */
362 /* skip over newlines & whitespace */
363 if ((**s == '\n') || isspace(**s)) {
368 if (!isxdigit(**s)) {
373 /* parse vendor_id */
375 while (runner < *e) {
376 /* scan for vendor:device delimiter */
377 if (*runner == ':') {
378 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
385 if (!isxdigit(*runner)) {
390 /* parse device_id */
392 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
401 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
404 struct list_control *listctl;
405 struct edac_pci_device_list *list;
406 unsigned int vendor_id, device_id;
414 list = listctl->list;
415 index = listctl->count;
418 while (*index < MAX_LISTED_PCI_DEVICES) {
420 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
421 list[ *index ].vendor = vendor_id;
422 list[ *index ].device = device_id;
426 /* check for all data consume */
434 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
437 return sprintf(buffer,"%d\n",*value);
440 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
444 if (isdigit(*buffer))
445 *value = simple_strtoul(buffer,NULL,0);
450 struct edac_pci_dev_attribute {
451 struct attribute attr;
453 ssize_t (*show)(void *,char *);
454 ssize_t (*store)(void *, const char *,size_t);
457 /* Set of show/store abstract level functions for PCI Parity object */
458 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
461 struct edac_pci_dev_attribute *edac_pci_dev;
462 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
464 if (edac_pci_dev->show)
465 return edac_pci_dev->show(edac_pci_dev->value, buffer);
469 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
470 const char *buffer, size_t count)
472 struct edac_pci_dev_attribute *edac_pci_dev;
473 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
475 if (edac_pci_dev->show)
476 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
480 static struct sysfs_ops edac_pci_sysfs_ops = {
481 .show = edac_pci_dev_show,
482 .store = edac_pci_dev_store
486 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
487 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
488 .attr = {.name = __stringify(_name), .mode = _mode }, \
494 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
495 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
496 .attr = {.name = __stringify(_name), .mode = _mode }, \
502 static struct list_control pci_whitelist_control = {
503 .list = pci_whitelist,
504 .count = &pci_whitelist_count
507 static struct list_control pci_blacklist_control = {
508 .list = pci_blacklist,
509 .count = &pci_blacklist_count
512 /* whitelist attribute */
513 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
514 &pci_whitelist_control,
516 edac_pci_list_string_show,
517 edac_pci_list_string_store);
519 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
520 &pci_blacklist_control,
522 edac_pci_list_string_show,
523 edac_pci_list_string_store);
525 /* PCI Parity control files */
526 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
527 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
528 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
530 /* Base Attributes of the memory ECC object */
531 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
532 &edac_pci_attr_check_pci_parity,
533 &edac_pci_attr_panic_on_pci_parity,
534 &edac_pci_attr_pci_parity_count,
535 &edac_pci_attr_pci_parity_whitelist,
536 &edac_pci_attr_pci_parity_blacklist,
540 /* No memory to release */
541 static void edac_pci_release(struct kobject *kobj)
543 debugf1("EDAC PCI: " __FILE__ ": %s()\n", __func__);
546 static struct kobj_type ktype_edac_pci = {
547 .release = edac_pci_release,
548 .sysfs_ops = &edac_pci_sysfs_ops,
549 .default_attrs = (struct attribute **) edac_pci_attr,
553 * edac_sysfs_pci_setup()
556 static int edac_sysfs_pci_setup(void)
560 debugf1("MC: " __FILE__ ": %s()\n", __func__);
562 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
564 kobject_init(&edac_pci_kobj);
565 edac_pci_kobj.parent = &edac_class.kset.kobj;
566 edac_pci_kobj.ktype = &ktype_edac_pci;
568 err = kobject_set_name(&edac_pci_kobj, "pci");
570 /* Instanstiate the csrow object */
571 /* FIXME: maybe new sysdev_create_subdir() */
572 err = kobject_register(&edac_pci_kobj);
574 debugf1("Failed to register '.../edac/pci'\n");
576 debugf1("Registered '.../edac/pci' kobject\n");
582 static void edac_sysfs_pci_teardown(void)
584 debugf0("MC: " __FILE__ ": %s()\n", __func__);
586 kobject_unregister(&edac_pci_kobj);
587 kobject_put(&edac_pci_kobj);
590 /* EDAC sysfs CSROW data structures and methods */
592 /* Set of more detailed csrow<id> attribute show/store functions */
593 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
597 if (csrow->nr_channels > 0) {
598 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
599 csrow->channels[0].label);
604 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
608 if (csrow->nr_channels > 0) {
609 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
610 csrow->channels[1].label);
615 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
616 const char *data, size_t size)
618 ssize_t max_size = 0;
620 if (csrow->nr_channels > 0) {
621 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
622 strncpy(csrow->channels[0].label, data, max_size);
623 csrow->channels[0].label[max_size] = '\0';
628 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
629 const char *data, size_t size)
631 ssize_t max_size = 0;
633 if (csrow->nr_channels > 1) {
634 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
635 strncpy(csrow->channels[1].label, data, max_size);
636 csrow->channels[1].label[max_size] = '\0';
641 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
643 return sprintf(data,"%u\n", csrow->ue_count);
646 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
648 return sprintf(data,"%u\n", csrow->ce_count);
651 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
655 if (csrow->nr_channels > 0) {
656 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
661 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
665 if (csrow->nr_channels > 1) {
666 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
671 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
673 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
676 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
678 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
681 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
683 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
686 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
688 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
691 struct csrowdev_attribute {
692 struct attribute attr;
693 ssize_t (*show)(struct csrow_info *,char *);
694 ssize_t (*store)(struct csrow_info *, const char *,size_t);
697 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
698 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
700 /* Set of show/store higher level functions for csrow objects */
701 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
704 struct csrow_info *csrow = to_csrow(kobj);
705 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
707 if (csrowdev_attr->show)
708 return csrowdev_attr->show(csrow, buffer);
712 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
713 const char *buffer, size_t count)
715 struct csrow_info *csrow = to_csrow(kobj);
716 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
718 if (csrowdev_attr->store)
719 return csrowdev_attr->store(csrow, buffer, count);
723 static struct sysfs_ops csrowfs_ops = {
724 .show = csrowdev_show,
725 .store = csrowdev_store
728 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
729 struct csrowdev_attribute attr_##_name = { \
730 .attr = {.name = __stringify(_name), .mode = _mode }, \
735 /* cwrow<id>/attribute files */
736 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
737 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
738 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
739 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
740 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
741 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
742 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
743 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
745 /* control/attribute files */
746 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
747 csrow_ch0_dimm_label_show,
748 csrow_ch0_dimm_label_store);
749 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
750 csrow_ch1_dimm_label_show,
751 csrow_ch1_dimm_label_store);
754 /* Attributes of the CSROW<id> object */
755 static struct csrowdev_attribute *csrow_attr[] = {
764 &attr_ch0_dimm_label,
765 &attr_ch1_dimm_label,
770 /* No memory to release */
771 static void edac_csrow_instance_release(struct kobject *kobj)
773 debugf1("EDAC MC: " __FILE__ ": %s()\n", __func__);
776 static struct kobj_type ktype_csrow = {
777 .release = edac_csrow_instance_release,
778 .sysfs_ops = &csrowfs_ops,
779 .default_attrs = (struct attribute **) csrow_attr,
782 /* Create a CSROW object under specifed edac_mc_device */
783 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
784 struct csrow_info *csrow, int index )
788 debugf0("MC: " __FILE__ ": %s()\n", __func__);
790 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
792 /* generate ..../edac/mc/mc<id>/csrow<index> */
794 kobject_init(&csrow->kobj);
795 csrow->kobj.parent = edac_mci_kobj;
796 csrow->kobj.ktype = &ktype_csrow;
798 /* name this instance of csrow<id> */
799 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
801 /* Instanstiate the csrow object */
802 err = kobject_register(&csrow->kobj);
804 debugf0("Failed to register CSROW%d\n",index);
806 debugf0("Registered CSROW%d\n",index);
812 /* sysfs data structures and methods for the MCI kobjects */
814 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
815 const char *data, size_t count )
819 mci->ue_noinfo_count = 0;
820 mci->ce_noinfo_count = 0;
823 for (row = 0; row < mci->nr_csrows; row++) {
824 struct csrow_info *ri = &mci->csrows[row];
828 for (chan = 0; chan < ri->nr_channels; chan++)
829 ri->channels[chan].ce_count = 0;
831 mci->start_time = jiffies;
836 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
838 return sprintf(data,"%d\n", mci->ue_count);
841 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
843 return sprintf(data,"%d\n", mci->ce_count);
846 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
848 return sprintf(data,"%d\n", mci->ce_noinfo_count);
851 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
853 return sprintf(data,"%d\n", mci->ue_noinfo_count);
856 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
858 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
861 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
863 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
866 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
868 return sprintf(data,"%s\n", mci->ctl_name);
871 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
876 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
877 if ((edac_cap >> bit_idx) & 0x1)
878 p += sprintf(p, "%s ", edac_caps[bit_idx]);
884 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
888 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
889 p += sprintf(p, "\n");
894 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
899 p += mci_output_edac_cap(p,mci->edac_cap);
900 p += sprintf(p, "\n");
905 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
910 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
911 if ((mtype_cap >> bit_idx) & 0x1)
912 p += sprintf(p, "%s ", mem_types[bit_idx]);
918 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
922 p += mci_output_mtype_cap(p,mci->mtype_cap);
923 p += sprintf(p, "\n");
928 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
930 int total_pages, csrow_idx;
932 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
934 struct csrow_info *csrow = &mci->csrows[csrow_idx];
936 if (!csrow->nr_pages)
938 total_pages += csrow->nr_pages;
941 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
944 struct mcidev_attribute {
945 struct attribute attr;
946 ssize_t (*show)(struct mem_ctl_info *,char *);
947 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
950 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
951 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
953 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
956 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
957 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
959 if (mcidev_attr->show)
960 return mcidev_attr->show(mem_ctl_info, buffer);
964 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
965 const char *buffer, size_t count)
967 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
968 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
970 if (mcidev_attr->store)
971 return mcidev_attr->store(mem_ctl_info, buffer, count);
975 static struct sysfs_ops mci_ops = {
977 .store = mcidev_store
980 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
981 struct mcidev_attribute mci_attr_##_name = { \
982 .attr = {.name = __stringify(_name), .mode = _mode }, \
988 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
990 /* Attribute files */
991 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
992 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
993 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
994 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
995 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
996 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
997 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
998 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
999 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1000 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1001 mci_edac_current_capability_show,NULL);
1002 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1003 mci_supported_mem_type_show,NULL);
1006 static struct mcidev_attribute *mci_attr[] = {
1007 &mci_attr_reset_counters,
1008 &mci_attr_module_name,
1010 &mci_attr_edac_capability,
1011 &mci_attr_edac_current_capability,
1012 &mci_attr_supported_mem_type,
1014 &mci_attr_seconds_since_reset,
1015 &mci_attr_ue_noinfo_count,
1016 &mci_attr_ce_noinfo_count,
1024 * Release of a MC controlling instance
1026 static void edac_mci_instance_release(struct kobject *kobj)
1028 struct mem_ctl_info *mci;
1029 mci = container_of(kobj,struct mem_ctl_info,edac_mci_kobj);
1031 debugf0("MC: " __FILE__ ": %s() idx=%d calling kfree\n",
1032 __func__, mci->mc_idx);
1037 static struct kobj_type ktype_mci = {
1038 .release = edac_mci_instance_release,
1039 .sysfs_ops = &mci_ops,
1040 .default_attrs = (struct attribute **) mci_attr,
1043 #define EDAC_DEVICE_SYMLINK "device"
1046 * Create a new Memory Controller kobject instance,
1047 * mc<id> under the 'mc' directory
1053 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1057 struct csrow_info *csrow;
1058 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1060 debugf0("MC: " __FILE__ ": %s() idx=%d\n", __func__, mci->mc_idx);
1062 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1063 kobject_init(edac_mci_kobj);
1065 /* set the name of the mc<id> object */
1066 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1070 /* link to our parent the '..../edac/mc' object */
1071 edac_mci_kobj->parent = &edac_memctrl_kobj;
1072 edac_mci_kobj->ktype = &ktype_mci;
1074 /* register the mc<id> kobject */
1075 err = kobject_register(edac_mci_kobj);
1079 /* create a symlink for the device */
1080 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1081 EDAC_DEVICE_SYMLINK);
1083 kobject_unregister(edac_mci_kobj);
1087 /* Make directories for each CSROW object
1088 * under the mc<id> kobject
1090 for (i = 0; i < mci->nr_csrows; i++) {
1092 csrow = &mci->csrows[i];
1094 /* Only expose populated CSROWs */
1095 if (csrow->nr_pages > 0) {
1096 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1102 /* Mark this MCI instance as having sysfs entries */
1103 mci->sysfs_active = MCI_SYSFS_ACTIVE;
1108 /* CSROW error: backout what has already been registered, */
1110 for ( i--; i >= 0; i--) {
1111 if (csrow->nr_pages > 0) {
1112 kobject_unregister(&mci->csrows[i].kobj);
1113 kobject_put(&mci->csrows[i].kobj);
1117 kobject_unregister(edac_mci_kobj);
1118 kobject_put(edac_mci_kobj);
1124 * remove a Memory Controller instance
1126 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1130 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1132 /* remove all csrow kobjects */
1133 for (i = 0; i < mci->nr_csrows; i++) {
1134 if (mci->csrows[i].nr_pages > 0) {
1135 kobject_unregister(&mci->csrows[i].kobj);
1136 kobject_put(&mci->csrows[i].kobj);
1140 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1142 kobject_unregister(&mci->edac_mci_kobj);
1143 kobject_put(&mci->edac_mci_kobj);
1146 /* END OF sysfs data and methods */
1148 #ifdef CONFIG_EDAC_DEBUG
1150 EXPORT_SYMBOL(edac_mc_dump_channel);
1152 void edac_mc_dump_channel(struct channel_info *chan)
1154 debugf4("\tchannel = %p\n", chan);
1155 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1156 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1157 debugf4("\tchannel->label = '%s'\n", chan->label);
1158 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1162 EXPORT_SYMBOL(edac_mc_dump_csrow);
1164 void edac_mc_dump_csrow(struct csrow_info *csrow)
1166 debugf4("\tcsrow = %p\n", csrow);
1167 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1168 debugf4("\tcsrow->first_page = 0x%lx\n",
1170 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1171 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1172 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1173 debugf4("\tcsrow->nr_channels = %d\n",
1174 csrow->nr_channels);
1175 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1176 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1180 EXPORT_SYMBOL(edac_mc_dump_mci);
1182 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1184 debugf3("\tmci = %p\n", mci);
1185 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1186 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1187 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1188 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1189 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1190 mci->nr_csrows, mci->csrows);
1191 debugf3("\tpdev = %p\n", mci->pdev);
1192 debugf3("\tmod_name:ctl_name = %s:%s\n",
1193 mci->mod_name, mci->ctl_name);
1194 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1198 #endif /* CONFIG_EDAC_DEBUG */
1200 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1201 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1202 * compiler would provide for X and return the aligned result.
1204 * If 'size' is a constant, the compiler will optimize this whole function
1205 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1207 static inline char * align_ptr (void *ptr, unsigned size)
1211 /* Here we assume that the alignment of a "long long" is the most
1212 * stringent alignment that the compiler will ever provide by default.
1213 * As far as I know, this is a reasonable assumption.
1215 if (size > sizeof(long))
1216 align = sizeof(long long);
1217 else if (size > sizeof(int))
1218 align = sizeof(long);
1219 else if (size > sizeof(short))
1220 align = sizeof(int);
1221 else if (size > sizeof(char))
1222 align = sizeof(short);
1224 return (char *) ptr;
1229 return (char *) ptr;
1231 return (char *) (((unsigned long) ptr) + align - r);
1235 EXPORT_SYMBOL(edac_mc_alloc);
1238 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1239 * @size_pvt: size of private storage needed
1240 * @nr_csrows: Number of CWROWS needed for this MC
1241 * @nr_chans: Number of channels for the MC
1243 * Everything is kmalloc'ed as one big chunk - more efficient.
1244 * Only can be used if all structures have the same lifetime - otherwise
1245 * you have to allocate and initialize your own structures.
1247 * Use edac_mc_free() to free mc structures allocated by this function.
1250 * NULL allocation failed
1251 * struct mem_ctl_info pointer
1253 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1256 struct mem_ctl_info *mci;
1257 struct csrow_info *csi, *csrow;
1258 struct channel_info *chi, *chp, *chan;
1263 /* Figure out the offsets of the various items from the start of an mc
1264 * structure. We want the alignment of each item to be at least as
1265 * stringent as what the compiler would provide if we could simply
1266 * hardcode everything into a single struct.
1268 mci = (struct mem_ctl_info *) 0;
1269 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1270 chi = (struct channel_info *)
1271 align_ptr(&csi[nr_csrows], sizeof(*chi));
1272 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1273 size = ((unsigned long) pvt) + sz_pvt;
1275 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1278 /* Adjust pointers so they point within the memory we just allocated
1279 * rather than an imaginary chunk of memory located at address 0.
1281 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1282 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1283 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1285 memset(mci, 0, size); /* clear all fields */
1288 mci->pvt_info = pvt;
1289 mci->nr_csrows = nr_csrows;
1291 for (row = 0; row < nr_csrows; row++) {
1293 csrow->csrow_idx = row;
1295 csrow->nr_channels = nr_chans;
1296 chp = &chi[row * nr_chans];
1297 csrow->channels = chp;
1299 for (chn = 0; chn < nr_chans; chn++) {
1301 chan->chan_idx = chn;
1302 chan->csrow = csrow;
1310 EXPORT_SYMBOL(edac_mc_free);
1313 * edac_mc_free: Free a previously allocated 'mci' structure
1314 * @mci: pointer to a struct mem_ctl_info structure
1316 * Free up a previously allocated mci structure
1317 * A MCI structure can be in 2 states after being allocated
1318 * by edac_mc_alloc().
1319 * 1) Allocated in a MC driver's probe, but not yet committed
1320 * 2) Allocated and committed, by a call to edac_mc_add_mc()
1321 * edac_mc_add_mc() is the function that adds the sysfs entries
1322 * thus, this free function must determine which state the 'mci'
1323 * structure is in, then either free it directly or
1324 * perform kobject cleanup by calling edac_remove_sysfs_mci_device().
1328 void edac_mc_free(struct mem_ctl_info *mci)
1330 /* only if sysfs entries for this mci instance exist
1331 * do we remove them and defer the actual kfree via
1332 * the kobject 'release()' callback.
1334 * Otherwise, do a straight kfree now.
1336 if (mci->sysfs_active == MCI_SYSFS_ACTIVE)
1337 edac_remove_sysfs_mci_device(mci);
1344 EXPORT_SYMBOL(edac_mc_find_mci_by_pdev);
1346 struct mem_ctl_info *edac_mc_find_mci_by_pdev(struct pci_dev *pdev)
1348 struct mem_ctl_info *mci;
1349 struct list_head *item;
1351 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1353 list_for_each(item, &mc_devices) {
1354 mci = list_entry(item, struct mem_ctl_info, link);
1356 if (mci->pdev == pdev)
1363 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1365 struct list_head *item, *insert_before;
1366 struct mem_ctl_info *p;
1369 if (list_empty(&mc_devices)) {
1371 insert_before = &mc_devices;
1373 if (edac_mc_find_mci_by_pdev(mci->pdev)) {
1375 "EDAC MC: %s (%s) %s %s already assigned %d\n",
1376 mci->pdev->dev.bus_id, pci_name(mci->pdev),
1377 mci->mod_name, mci->ctl_name, mci->mc_idx);
1381 insert_before = NULL;
1384 list_for_each(item, &mc_devices) {
1385 p = list_entry(item, struct mem_ctl_info, link);
1387 if (p->mc_idx != i) {
1388 insert_before = item;
1397 if (insert_before == NULL)
1398 insert_before = &mc_devices;
1401 list_add_tail_rcu(&mci->link, insert_before);
1407 EXPORT_SYMBOL(edac_mc_add_mc);
1410 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list
1411 * @mci: pointer to the mci structure to be added to the list
1418 /* FIXME - should a warning be printed if no error detection? correction? */
1419 int edac_mc_add_mc(struct mem_ctl_info *mci)
1423 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1424 #ifdef CONFIG_EDAC_DEBUG
1425 if (edac_debug_level >= 3)
1426 edac_mc_dump_mci(mci);
1427 if (edac_debug_level >= 4) {
1430 for (i = 0; i < mci->nr_csrows; i++) {
1432 edac_mc_dump_csrow(&mci->csrows[i]);
1433 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1434 edac_mc_dump_channel(&mci->csrows[i].
1439 down(&mem_ctls_mutex);
1441 if (add_mc_to_global_list(mci))
1444 /* set load time so that error rate can be tracked */
1445 mci->start_time = jiffies;
1447 if (edac_create_sysfs_mci_device(mci)) {
1449 "EDAC MC%d: failed to create sysfs device\n",
1451 /* FIXME - should there be an error code and unwind? */
1455 /* Report action taken */
1457 "EDAC MC%d: Giving out device to %s %s: PCI %s\n",
1458 mci->mc_idx, mci->mod_name, mci->ctl_name,
1459 pci_name(mci->pdev));
1465 up(&mem_ctls_mutex);
1471 static void complete_mc_list_del (struct rcu_head *head)
1473 struct mem_ctl_info *mci;
1475 mci = container_of(head, struct mem_ctl_info, rcu);
1476 INIT_LIST_HEAD(&mci->link);
1477 complete(&mci->complete);
1480 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1482 list_del_rcu(&mci->link);
1483 init_completion(&mci->complete);
1484 call_rcu(&mci->rcu, complete_mc_list_del);
1485 wait_for_completion(&mci->complete);
1488 EXPORT_SYMBOL(edac_mc_del_mc);
1491 * edac_mc_del_mc: Remove the specified mci structure from global list
1492 * @mci: Pointer to struct mem_ctl_info structure
1498 int edac_mc_del_mc(struct mem_ctl_info *mci)
1502 debugf0("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1503 down(&mem_ctls_mutex);
1504 del_mc_from_global_list(mci);
1506 "EDAC MC%d: Removed device %d for %s %s: PCI %s\n",
1507 mci->mc_idx, mci->mc_idx, mci->mod_name, mci->ctl_name,
1508 pci_name(mci->pdev));
1510 up(&mem_ctls_mutex);
1516 EXPORT_SYMBOL(edac_mc_scrub_block);
1518 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1523 unsigned long flags = 0;
1525 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1527 /* ECC error page was not in our memory. Ignore it. */
1528 if(!pfn_valid(page))
1531 /* Find the actual page structure then map it and fix */
1532 pg = pfn_to_page(page);
1534 if (PageHighMem(pg))
1535 local_irq_save(flags);
1537 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1539 /* Perform architecture specific atomic scrub operation */
1540 atomic_scrub(virt_addr + offset, size);
1542 /* Unmap and complete */
1543 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1545 if (PageHighMem(pg))
1546 local_irq_restore(flags);
1550 /* FIXME - should return -1 */
1551 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1553 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1556 struct csrow_info *csrows = mci->csrows;
1559 debugf1("MC%d: " __FILE__ ": %s(): 0x%lx\n", mci->mc_idx, __func__,
1563 for (i = 0; i < mci->nr_csrows; i++) {
1564 struct csrow_info *csrow = &csrows[i];
1566 if (csrow->nr_pages == 0)
1569 debugf3("MC%d: " __FILE__
1570 ": %s(): first(0x%lx) page(0x%lx)"
1571 " last(0x%lx) mask(0x%lx)\n", mci->mc_idx,
1572 __func__, csrow->first_page, page,
1573 csrow->last_page, csrow->page_mask);
1575 if ((page >= csrow->first_page) &&
1576 (page <= csrow->last_page) &&
1577 ((page & csrow->page_mask) ==
1578 (csrow->first_page & csrow->page_mask))) {
1586 "EDAC MC%d: could not look up page error address %lx\n",
1587 mci->mc_idx, (unsigned long) page);
1593 EXPORT_SYMBOL(edac_mc_handle_ce);
1595 /* FIXME - setable log (warning/emerg) levels */
1596 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1597 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1598 unsigned long page_frame_number,
1599 unsigned long offset_in_page,
1600 unsigned long syndrome, int row, int channel,
1603 unsigned long remapped_page;
1605 debugf3("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1607 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1608 if (row >= mci->nr_csrows || row < 0) {
1609 /* something is wrong */
1611 "EDAC MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n",
1612 mci->mc_idx, row, mci->nr_csrows);
1613 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1616 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1617 /* something is wrong */
1619 "EDAC MC%d: INTERNAL ERROR: channel out of range "
1621 mci->mc_idx, channel, mci->csrows[row].nr_channels);
1622 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1627 /* FIXME - put in DIMM location */
1629 "EDAC MC%d: CE page 0x%lx, offset 0x%lx,"
1630 " grain %d, syndrome 0x%lx, row %d, channel %d,"
1631 " label \"%s\": %s\n", mci->mc_idx,
1632 page_frame_number, offset_in_page,
1633 mci->csrows[row].grain, syndrome, row, channel,
1634 mci->csrows[row].channels[channel].label, msg);
1637 mci->csrows[row].ce_count++;
1638 mci->csrows[row].channels[channel].ce_count++;
1640 if (mci->scrub_mode & SCRUB_SW_SRC) {
1642 * Some MC's can remap memory so that it is still available
1643 * at a different address when PCI devices map into memory.
1644 * MC's that can't do this lose the memory where PCI devices
1645 * are mapped. This mapping is MC dependant and so we call
1646 * back into the MC driver for it to map the MC page to
1647 * a physical (CPU) page which can then be mapped to a virtual
1648 * page - which can then be scrubbed.
1650 remapped_page = mci->ctl_page_to_phys ?
1651 mci->ctl_page_to_phys(mci, page_frame_number) :
1654 edac_mc_scrub_block(remapped_page, offset_in_page,
1655 mci->csrows[row].grain);
1660 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1662 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1667 "EDAC MC%d: CE - no information available: %s\n",
1669 mci->ce_noinfo_count++;
1674 EXPORT_SYMBOL(edac_mc_handle_ue);
1676 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1677 unsigned long page_frame_number,
1678 unsigned long offset_in_page, int row,
1681 int len = EDAC_MC_LABEL_LEN * 4;
1682 char labels[len + 1];
1687 debugf3("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1689 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1690 if (row >= mci->nr_csrows || row < 0) {
1691 /* something is wrong */
1693 "EDAC MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n",
1694 mci->mc_idx, row, mci->nr_csrows);
1695 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1699 chars = snprintf(pos, len + 1, "%s",
1700 mci->csrows[row].channels[0].label);
1703 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1705 chars = snprintf(pos, len + 1, ":%s",
1706 mci->csrows[row].channels[chan].label);
1713 "EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1714 " labels \"%s\": %s\n", mci->mc_idx,
1715 page_frame_number, offset_in_page,
1716 mci->csrows[row].grain, row, labels, msg);
1720 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1721 " labels \"%s\": %s\n", mci->mc_idx,
1722 page_frame_number, offset_in_page,
1723 mci->csrows[row].grain, row, labels, msg);
1726 mci->csrows[row].ue_count++;
1730 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1732 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1736 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1740 "EDAC MC%d: UE - no information available: %s\n",
1742 mci->ue_noinfo_count++;
1749 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1754 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1755 pci_read_config_word(dev, where, &status);
1757 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1758 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1761 if (status == 0xFFFF) {
1763 pci_read_config_dword(dev, 0, &sanity);
1764 if (sanity == 0xFFFFFFFF)
1767 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1771 /* reset only the bits we are interested in */
1772 pci_write_config_word(dev, where, status);
1777 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1779 /* Clear any PCI parity errors logged by this device. */
1780 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1784 get_pci_parity_status(dev, 0);
1786 /* read the device TYPE, looking for bridges */
1787 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1789 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1790 get_pci_parity_status(dev, 1);
1794 * PCI Parity polling
1797 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1802 /* read the STATUS register on this device
1804 status = get_pci_parity_status(dev, 0);
1806 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1808 /* check the status reg for errors */
1810 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1813 "Signaled System Error on %s\n",
1816 if (status & (PCI_STATUS_PARITY)) {
1819 "Master Data Parity Error on %s\n",
1822 atomic_inc(&pci_parity_count);
1825 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1828 "Detected Parity Error on %s\n",
1831 atomic_inc(&pci_parity_count);
1835 /* read the device TYPE, looking for bridges */
1836 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1838 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1840 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1841 /* On bridges, need to examine secondary status register */
1842 status = get_pci_parity_status(dev, 1);
1844 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1845 status, dev->dev.bus_id );
1847 /* check the secondary status reg for errors */
1849 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1852 "Signaled System Error on %s\n",
1855 if (status & (PCI_STATUS_PARITY)) {
1858 "Master Data Parity Error on %s\n",
1861 atomic_inc(&pci_parity_count);
1864 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1867 "Detected Parity Error on %s\n",
1870 atomic_inc(&pci_parity_count);
1877 * check_dev_on_list: Scan for a PCI device on a white/black list
1878 * @list: an EDAC &edac_pci_device_list white/black list pointer
1879 * @free_index: index of next free entry on the list
1880 * @pci_dev: PCI Device pointer
1882 * see if list contains the device.
1884 * Returns: 0 not found
1887 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1888 struct pci_dev *dev)
1891 int rc = 0; /* Assume not found */
1892 unsigned short vendor=dev->vendor;
1893 unsigned short device=dev->device;
1895 /* Scan the list, looking for a vendor/device match
1897 for (i = 0; i < free_index; i++, list++ ) {
1898 if ( (list->vendor == vendor ) &&
1899 (list->device == device )) {
1909 * pci_dev parity list iterator
1910 * Scan the PCI device list for one iteration, looking for SERRORs
1911 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1913 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1915 struct pci_dev *dev=NULL;
1917 /* request for kernel access to the next PCI device, if any,
1918 * and while we are looking at it have its reference count
1919 * bumped until we are done with it
1921 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1923 /* if whitelist exists then it has priority, so only scan those
1924 * devices on the whitelist
1926 if (pci_whitelist_count > 0 ) {
1927 if (check_dev_on_list(pci_whitelist,
1928 pci_whitelist_count, dev))
1932 * if no whitelist, then check if this devices is
1935 if (!check_dev_on_list(pci_blacklist,
1936 pci_blacklist_count, dev))
1942 static void do_pci_parity_check(void)
1944 unsigned long flags;
1947 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1949 if (!check_pci_parity)
1952 before_count = atomic_read(&pci_parity_count);
1954 /* scan all PCI devices looking for a Parity Error on devices and
1957 local_irq_save(flags);
1958 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1959 local_irq_restore(flags);
1961 /* Only if operator has selected panic on PCI Error */
1962 if (panic_on_pci_parity) {
1963 /* If the count is different 'after' from 'before' */
1964 if (before_count != atomic_read(&pci_parity_count))
1965 panic("EDAC: PCI Parity Error");
1970 static inline void clear_pci_parity_errors(void)
1972 /* Clear any PCI bus parity errors that devices initially have logged
1973 * in their registers.
1975 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1979 #else /* CONFIG_PCI */
1982 static inline void do_pci_parity_check(void)
1988 static inline void clear_pci_parity_errors(void)
1994 #endif /* CONFIG_PCI */
1997 * Iterate over all MC instances and check for ECC, et al, errors
1999 static inline void check_mc_devices (void)
2001 unsigned long flags;
2002 struct list_head *item;
2003 struct mem_ctl_info *mci;
2005 debugf3("MC: " __FILE__ ": %s()\n", __func__);
2007 /* during poll, have interrupts off */
2008 local_irq_save(flags);
2010 list_for_each(item, &mc_devices) {
2011 mci = list_entry(item, struct mem_ctl_info, link);
2013 if (mci->edac_check != NULL)
2014 mci->edac_check(mci);
2017 local_irq_restore(flags);
2022 * Check MC status every poll_msec.
2023 * Check PCI status every poll_msec as well.
2025 * This where the work gets done for edac.
2027 * SMP safe, doesn't use NMI, and auto-rate-limits.
2029 static void do_edac_check(void)
2032 debugf3("MC: " __FILE__ ": %s()\n", __func__);
2036 do_pci_parity_check();
2041 * EDAC thread state information
2043 struct bs_thread_info
2045 struct task_struct *task;
2046 struct completion *event;
2051 static struct bs_thread_info bs_thread;
2054 * edac_kernel_thread
2055 * This the kernel thread that processes edac operations
2056 * in a normal thread environment
2058 static int edac_kernel_thread(void *arg)
2060 struct bs_thread_info *thread = (struct bs_thread_info *) arg;
2063 daemonize(thread->name);
2065 current->exit_signal = SIGCHLD;
2066 allow_signal(SIGKILL);
2067 thread->task = current;
2069 /* indicate to starting task we have started */
2070 complete(thread->event);
2072 /* loop forever, until we are told to stop */
2073 while(thread->run != NULL) {
2076 /* call the function to check the memory controllers */
2081 if (signal_pending(current))
2082 flush_signals(current);
2084 /* ensure we are interruptable */
2085 set_current_state(TASK_INTERRUPTIBLE);
2087 /* goto sleep for the interval */
2088 schedule_timeout((HZ * poll_msec) / 1000);
2092 /* notify waiter that we are exiting */
2093 complete(thread->event);
2100 * module initialization entry point
2102 static int __init edac_mc_init(void)
2105 struct completion event;
2107 printk(KERN_INFO "MC: " __FILE__ " version " EDAC_MC_VERSION "\n");
2110 * Harvest and clear any boot/initialization PCI parity errors
2112 * FIXME: This only clears errors logged by devices present at time of
2113 * module initialization. We should also do an initial clear
2114 * of each newly hotplugged device.
2116 clear_pci_parity_errors();
2118 /* perform check for first time to harvest boot leftovers */
2121 /* Create the MC sysfs entires */
2122 if (edac_sysfs_memctrl_setup()) {
2123 printk(KERN_ERR "EDAC MC: Error initializing sysfs code\n");
2127 /* Create the PCI parity sysfs entries */
2128 if (edac_sysfs_pci_setup()) {
2129 edac_sysfs_memctrl_teardown();
2130 printk(KERN_ERR "EDAC PCI: Error initializing sysfs code\n");
2134 /* Create our kernel thread */
2135 init_completion(&event);
2136 bs_thread.event = &event;
2137 bs_thread.name = "kedac";
2138 bs_thread.run = do_edac_check;
2140 /* create our kernel thread */
2141 ret = kernel_thread(edac_kernel_thread, &bs_thread, CLONE_KERNEL);
2143 /* remove the sysfs entries */
2144 edac_sysfs_memctrl_teardown();
2145 edac_sysfs_pci_teardown();
2149 /* wait for our kernel theard ack that it is up and running */
2150 wait_for_completion(&event);
2158 * module exit/termination functioni
2160 static void __exit edac_mc_exit(void)
2162 struct completion event;
2164 debugf0("MC: " __FILE__ ": %s()\n", __func__);
2166 init_completion(&event);
2167 bs_thread.event = &event;
2169 /* As soon as ->run is set to NULL, the task could disappear,
2170 * so we need to hold tasklist_lock until we have sent the signal
2172 read_lock(&tasklist_lock);
2173 bs_thread.run = NULL;
2174 send_sig(SIGKILL, bs_thread.task, 1);
2175 read_unlock(&tasklist_lock);
2176 wait_for_completion(&event);
2178 /* tear down the sysfs device */
2179 edac_sysfs_memctrl_teardown();
2180 edac_sysfs_pci_teardown();
2186 module_init(edac_mc_init);
2187 module_exit(edac_mc_exit);
2189 MODULE_LICENSE("GPL");
2190 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2191 "Based on.work by Dan Hollis et al");
2192 MODULE_DESCRIPTION("Core library routines for MC reporting");
2194 module_param(panic_on_ue, int, 0644);
2195 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2196 module_param(check_pci_parity, int, 0644);
2197 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2198 module_param(panic_on_pci_parity, int, 0644);
2199 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2200 module_param(log_ue, int, 0644);
2201 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2202 module_param(log_ce, int, 0644);
2203 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2204 module_param(poll_msec, int, 0644);
2205 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2206 #ifdef CONFIG_EDAC_DEBUG
2207 module_param(edac_debug_level, int, 0644);
2208 MODULE_PARM_DESC(edac_debug_level, "Debug level");