2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/proc_fs.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/smp.h>
22 #include <linux/init.h>
23 #include <linux/sysctl.h>
24 #include <linux/highmem.h>
25 #include <linux/timer.h>
26 #include <linux/slab.h>
27 #include <linux/jiffies.h>
28 #include <linux/spinlock.h>
29 #include <linux/list.h>
30 #include <linux/sysdev.h>
31 #include <linux/ctype.h>
32 #include <linux/kthread.h>
34 #include <asm/uaccess.h>
40 #define EDAC_MC_VERSION "Ver: 2.0.0 " __DATE__
42 /* For now, disable the EDAC sysfs code. The sysfs interface that EDAC
43 * presents to user space needs more thought, and is likely to change
46 #define DISABLE_EDAC_SYSFS
48 #ifdef CONFIG_EDAC_DEBUG
49 /* Values of 0 to 4 will generate output */
50 int edac_debug_level = 1;
51 EXPORT_SYMBOL(edac_debug_level);
54 /* EDAC Controls, setable by module parameter, and sysfs */
55 static int log_ue = 1;
56 static int log_ce = 1;
57 static int panic_on_ue;
58 static int poll_msec = 1000;
60 static int check_pci_parity = 0; /* default YES check PCI parity */
61 static int panic_on_pci_parity; /* default no panic on PCI Parity */
62 static atomic_t pci_parity_count = ATOMIC_INIT(0);
64 /* lock to memory controller's control array */
65 static DECLARE_MUTEX(mem_ctls_mutex);
66 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
68 static struct task_struct *edac_thread;
70 /* Structure of the whitelist and blacklist arrays */
71 struct edac_pci_device_list {
72 unsigned int vendor; /* Vendor ID */
73 unsigned int device; /* Deviice ID */
77 #define MAX_LISTED_PCI_DEVICES 32
79 /* List of PCI devices (vendor-id:device-id) that should be skipped */
80 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
81 static int pci_blacklist_count;
83 /* List of PCI devices (vendor-id:device-id) that should be scanned */
84 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
85 static int pci_whitelist_count ;
87 /* START sysfs data and methods */
89 #ifndef DISABLE_EDAC_SYSFS
91 static const char *mem_types[] = {
92 [MEM_EMPTY] = "Empty",
93 [MEM_RESERVED] = "Reserved",
94 [MEM_UNKNOWN] = "Unknown",
98 [MEM_SDR] = "Unbuffered-SDR",
99 [MEM_RDR] = "Registered-SDR",
100 [MEM_DDR] = "Unbuffered-DDR",
101 [MEM_RDDR] = "Registered-DDR",
105 static const char *dev_types[] = {
106 [DEV_UNKNOWN] = "Unknown",
116 static const char *edac_caps[] = {
117 [EDAC_UNKNOWN] = "Unknown",
118 [EDAC_NONE] = "None",
119 [EDAC_RESERVED] = "Reserved",
120 [EDAC_PARITY] = "PARITY",
122 [EDAC_SECDED] = "SECDED",
123 [EDAC_S2ECD2ED] = "S2ECD2ED",
124 [EDAC_S4ECD4ED] = "S4ECD4ED",
125 [EDAC_S8ECD8ED] = "S8ECD8ED",
126 [EDAC_S16ECD16ED] = "S16ECD16ED"
130 /* sysfs object: /sys/devices/system/edac */
131 static struct sysdev_class edac_class = {
132 set_kset_name("edac"),
136 * /sys/devices/system/edac/mc
137 * /sys/devices/system/edac/pci
139 static struct kobject edac_memctrl_kobj;
140 static struct kobject edac_pci_kobj;
142 /* We use these to wait for the reference counts on edac_memctrl_kobj and
143 * edac_pci_kobj to reach 0.
145 static struct completion edac_memctrl_kobj_complete;
146 static struct completion edac_pci_kobj_complete;
149 * /sys/devices/system/edac/mc;
150 * data structures and methods
153 static ssize_t memctrl_string_show(void *ptr, char *buffer)
155 char *value = (char*) ptr;
156 return sprintf(buffer, "%s\n", value);
160 static ssize_t memctrl_int_show(void *ptr, char *buffer)
162 int *value = (int*) ptr;
163 return sprintf(buffer, "%d\n", *value);
166 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
168 int *value = (int*) ptr;
170 if (isdigit(*buffer))
171 *value = simple_strtoul(buffer, NULL, 0);
176 struct memctrl_dev_attribute {
177 struct attribute attr;
179 ssize_t (*show)(void *,char *);
180 ssize_t (*store)(void *, const char *, size_t);
183 /* Set of show/store abstract level functions for memory control object */
185 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
187 struct memctrl_dev_attribute *memctrl_dev;
188 memctrl_dev = (struct memctrl_dev_attribute*)attr;
190 if (memctrl_dev->show)
191 return memctrl_dev->show(memctrl_dev->value, buffer);
196 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
197 const char *buffer, size_t count)
199 struct memctrl_dev_attribute *memctrl_dev;
200 memctrl_dev = (struct memctrl_dev_attribute*)attr;
202 if (memctrl_dev->store)
203 return memctrl_dev->store(memctrl_dev->value, buffer, count);
207 static struct sysfs_ops memctrlfs_ops = {
208 .show = memctrl_dev_show,
209 .store = memctrl_dev_store
212 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
213 struct memctrl_dev_attribute attr_##_name = { \
214 .attr = {.name = __stringify(_name), .mode = _mode }, \
220 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
221 struct memctrl_dev_attribute attr_##_name = { \
222 .attr = {.name = __stringify(_name), .mode = _mode }, \
228 /* cwrow<id> attribute f*/
230 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
233 /* csrow<id> control files */
234 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
235 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
236 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
237 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
240 /* Base Attributes of the memory ECC object */
241 static struct memctrl_dev_attribute *memctrl_attr[] = {
249 /* Main MC kobject release() function */
250 static void edac_memctrl_master_release(struct kobject *kobj)
252 debugf1("%s()\n", __func__);
253 complete(&edac_memctrl_kobj_complete);
256 static struct kobj_type ktype_memctrl = {
257 .release = edac_memctrl_master_release,
258 .sysfs_ops = &memctrlfs_ops,
259 .default_attrs = (struct attribute **) memctrl_attr,
262 #endif /* DISABLE_EDAC_SYSFS */
264 /* Initialize the main sysfs entries for edac:
265 * /sys/devices/system/edac
272 static int edac_sysfs_memctrl_setup(void)
273 #ifdef DISABLE_EDAC_SYSFS
281 debugf1("%s()\n", __func__);
283 /* create the /sys/devices/system/edac directory */
284 err = sysdev_class_register(&edac_class);
286 /* Init the MC's kobject */
287 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
288 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
289 edac_memctrl_kobj.ktype = &ktype_memctrl;
291 /* generate sysfs "..../edac/mc" */
292 err = kobject_set_name(&edac_memctrl_kobj,"mc");
294 /* FIXME: maybe new sysdev_create_subdir() */
295 err = kobject_register(&edac_memctrl_kobj);
297 debugf1("Failed to register '.../edac/mc'\n");
299 debugf1("Registered '.../edac/mc' kobject\n");
303 debugf1("%s() error=%d\n", __func__, err);
308 #endif /* DISABLE_EDAC_SYSFS */
312 * the '..../edac/mc' kobject followed by '..../edac' itself
314 static void edac_sysfs_memctrl_teardown(void)
316 #ifndef DISABLE_EDAC_SYSFS
317 debugf0("MC: " __FILE__ ": %s()\n", __func__);
319 /* Unregister the MC's kobject and wait for reference count to reach
322 init_completion(&edac_memctrl_kobj_complete);
323 kobject_unregister(&edac_memctrl_kobj);
324 wait_for_completion(&edac_memctrl_kobj_complete);
326 /* Unregister the 'edac' object */
327 sysdev_class_unregister(&edac_class);
328 #endif /* DISABLE_EDAC_SYSFS */
331 #ifndef DISABLE_EDAC_SYSFS
334 * /sys/devices/system/edac/pci;
335 * data structures and methods
338 struct list_control {
339 struct edac_pci_device_list *list;
345 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
346 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
348 struct list_control *listctl;
349 struct edac_pci_device_list *list;
355 list = listctl->list;
357 for (i = 0; i < *(listctl->count); i++, list++ ) {
359 len += snprintf(p + len, (PAGE_SIZE-len), ",");
361 len += snprintf(p + len,
364 list->vendor,list->device);
367 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
369 return (ssize_t) len;
374 * Scan string from **s to **e looking for one 'vendor:device' tuple
375 * where each field is a hex value
377 * return 0 if an entry is NOT found
378 * return 1 if an entry is found
379 * fill in *vendor_id and *device_id with values found
381 * In both cases, make sure *s has been moved forward toward *e
383 static int parse_one_device(const char **s,const char **e,
384 unsigned int *vendor_id, unsigned int *device_id)
386 const char *runner, *p;
388 /* if null byte, we are done */
390 (*s)++; /* keep *s moving */
394 /* skip over newlines & whitespace */
395 if ((**s == '\n') || isspace(**s)) {
400 if (!isxdigit(**s)) {
405 /* parse vendor_id */
407 while (runner < *e) {
408 /* scan for vendor:device delimiter */
409 if (*runner == ':') {
410 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
417 if (!isxdigit(*runner)) {
422 /* parse device_id */
424 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
433 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
436 struct list_control *listctl;
437 struct edac_pci_device_list *list;
438 unsigned int vendor_id, device_id;
446 list = listctl->list;
447 index = listctl->count;
450 while (*index < MAX_LISTED_PCI_DEVICES) {
452 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
453 list[ *index ].vendor = vendor_id;
454 list[ *index ].device = device_id;
458 /* check for all data consume */
467 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
470 return sprintf(buffer,"%d\n",*value);
473 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
477 if (isdigit(*buffer))
478 *value = simple_strtoul(buffer,NULL,0);
483 struct edac_pci_dev_attribute {
484 struct attribute attr;
486 ssize_t (*show)(void *,char *);
487 ssize_t (*store)(void *, const char *,size_t);
490 /* Set of show/store abstract level functions for PCI Parity object */
491 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
494 struct edac_pci_dev_attribute *edac_pci_dev;
495 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
497 if (edac_pci_dev->show)
498 return edac_pci_dev->show(edac_pci_dev->value, buffer);
502 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
503 const char *buffer, size_t count)
505 struct edac_pci_dev_attribute *edac_pci_dev;
506 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
508 if (edac_pci_dev->show)
509 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
513 static struct sysfs_ops edac_pci_sysfs_ops = {
514 .show = edac_pci_dev_show,
515 .store = edac_pci_dev_store
519 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
520 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
521 .attr = {.name = __stringify(_name), .mode = _mode }, \
527 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
528 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
529 .attr = {.name = __stringify(_name), .mode = _mode }, \
536 static struct list_control pci_whitelist_control = {
537 .list = pci_whitelist,
538 .count = &pci_whitelist_count
541 static struct list_control pci_blacklist_control = {
542 .list = pci_blacklist,
543 .count = &pci_blacklist_count
546 /* whitelist attribute */
547 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
548 &pci_whitelist_control,
550 edac_pci_list_string_show,
551 edac_pci_list_string_store);
553 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
554 &pci_blacklist_control,
556 edac_pci_list_string_show,
557 edac_pci_list_string_store);
560 /* PCI Parity control files */
561 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
562 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
563 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
565 /* Base Attributes of the memory ECC object */
566 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
567 &edac_pci_attr_check_pci_parity,
568 &edac_pci_attr_panic_on_pci_parity,
569 &edac_pci_attr_pci_parity_count,
573 /* No memory to release */
574 static void edac_pci_release(struct kobject *kobj)
576 debugf1("%s()\n", __func__);
577 complete(&edac_pci_kobj_complete);
580 static struct kobj_type ktype_edac_pci = {
581 .release = edac_pci_release,
582 .sysfs_ops = &edac_pci_sysfs_ops,
583 .default_attrs = (struct attribute **) edac_pci_attr,
586 #endif /* DISABLE_EDAC_SYSFS */
589 * edac_sysfs_pci_setup()
592 static int edac_sysfs_pci_setup(void)
593 #ifdef DISABLE_EDAC_SYSFS
601 debugf1("%s()\n", __func__);
603 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
604 edac_pci_kobj.parent = &edac_class.kset.kobj;
605 edac_pci_kobj.ktype = &ktype_edac_pci;
607 err = kobject_set_name(&edac_pci_kobj, "pci");
609 /* Instanstiate the csrow object */
610 /* FIXME: maybe new sysdev_create_subdir() */
611 err = kobject_register(&edac_pci_kobj);
613 debugf1("Failed to register '.../edac/pci'\n");
615 debugf1("Registered '.../edac/pci' kobject\n");
619 #endif /* DISABLE_EDAC_SYSFS */
621 static void edac_sysfs_pci_teardown(void)
623 #ifndef DISABLE_EDAC_SYSFS
624 debugf0("%s()\n", __func__);
625 init_completion(&edac_pci_kobj_complete);
626 kobject_unregister(&edac_pci_kobj);
627 wait_for_completion(&edac_pci_kobj_complete);
631 #ifndef DISABLE_EDAC_SYSFS
633 /* EDAC sysfs CSROW data structures and methods */
635 /* Set of more detailed csrow<id> attribute show/store functions */
636 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
640 if (csrow->nr_channels > 0) {
641 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
642 csrow->channels[0].label);
647 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
651 if (csrow->nr_channels > 0) {
652 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
653 csrow->channels[1].label);
658 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
659 const char *data, size_t size)
661 ssize_t max_size = 0;
663 if (csrow->nr_channels > 0) {
664 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
665 strncpy(csrow->channels[0].label, data, max_size);
666 csrow->channels[0].label[max_size] = '\0';
671 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
672 const char *data, size_t size)
674 ssize_t max_size = 0;
676 if (csrow->nr_channels > 1) {
677 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
678 strncpy(csrow->channels[1].label, data, max_size);
679 csrow->channels[1].label[max_size] = '\0';
684 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
686 return sprintf(data,"%u\n", csrow->ue_count);
689 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
691 return sprintf(data,"%u\n", csrow->ce_count);
694 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
698 if (csrow->nr_channels > 0) {
699 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
704 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
708 if (csrow->nr_channels > 1) {
709 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
714 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
716 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
719 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
721 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
724 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
726 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
729 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
731 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
734 struct csrowdev_attribute {
735 struct attribute attr;
736 ssize_t (*show)(struct csrow_info *,char *);
737 ssize_t (*store)(struct csrow_info *, const char *,size_t);
740 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
741 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
743 /* Set of show/store higher level functions for csrow objects */
744 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
747 struct csrow_info *csrow = to_csrow(kobj);
748 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
750 if (csrowdev_attr->show)
751 return csrowdev_attr->show(csrow, buffer);
755 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
756 const char *buffer, size_t count)
758 struct csrow_info *csrow = to_csrow(kobj);
759 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
761 if (csrowdev_attr->store)
762 return csrowdev_attr->store(csrow, buffer, count);
766 static struct sysfs_ops csrowfs_ops = {
767 .show = csrowdev_show,
768 .store = csrowdev_store
771 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
772 struct csrowdev_attribute attr_##_name = { \
773 .attr = {.name = __stringify(_name), .mode = _mode }, \
778 /* cwrow<id>/attribute files */
779 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
780 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
781 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
782 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
783 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
784 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
785 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
786 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
788 /* control/attribute files */
789 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
790 csrow_ch0_dimm_label_show,
791 csrow_ch0_dimm_label_store);
792 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
793 csrow_ch1_dimm_label_show,
794 csrow_ch1_dimm_label_store);
797 /* Attributes of the CSROW<id> object */
798 static struct csrowdev_attribute *csrow_attr[] = {
807 &attr_ch0_dimm_label,
808 &attr_ch1_dimm_label,
813 /* No memory to release */
814 static void edac_csrow_instance_release(struct kobject *kobj)
816 struct csrow_info *cs;
818 debugf1("%s()\n", __func__);
819 cs = container_of(kobj, struct csrow_info, kobj);
820 complete(&cs->kobj_complete);
823 static struct kobj_type ktype_csrow = {
824 .release = edac_csrow_instance_release,
825 .sysfs_ops = &csrowfs_ops,
826 .default_attrs = (struct attribute **) csrow_attr,
829 /* Create a CSROW object under specifed edac_mc_device */
830 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
831 struct csrow_info *csrow, int index )
835 debugf0("%s()\n", __func__);
837 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
839 /* generate ..../edac/mc/mc<id>/csrow<index> */
841 csrow->kobj.parent = edac_mci_kobj;
842 csrow->kobj.ktype = &ktype_csrow;
844 /* name this instance of csrow<id> */
845 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
847 /* Instanstiate the csrow object */
848 err = kobject_register(&csrow->kobj);
850 debugf0("Failed to register CSROW%d\n",index);
852 debugf0("Registered CSROW%d\n",index);
858 /* sysfs data structures and methods for the MCI kobjects */
860 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
861 const char *data, size_t count )
865 mci->ue_noinfo_count = 0;
866 mci->ce_noinfo_count = 0;
869 for (row = 0; row < mci->nr_csrows; row++) {
870 struct csrow_info *ri = &mci->csrows[row];
874 for (chan = 0; chan < ri->nr_channels; chan++)
875 ri->channels[chan].ce_count = 0;
877 mci->start_time = jiffies;
882 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
884 return sprintf(data,"%d\n", mci->ue_count);
887 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
889 return sprintf(data,"%d\n", mci->ce_count);
892 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
894 return sprintf(data,"%d\n", mci->ce_noinfo_count);
897 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
899 return sprintf(data,"%d\n", mci->ue_noinfo_count);
902 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
904 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
907 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
909 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
912 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
914 return sprintf(data,"%s\n", mci->ctl_name);
917 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
922 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
923 if ((edac_cap >> bit_idx) & 0x1)
924 p += sprintf(p, "%s ", edac_caps[bit_idx]);
930 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
934 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
935 p += sprintf(p, "\n");
940 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
945 p += mci_output_edac_cap(p,mci->edac_cap);
946 p += sprintf(p, "\n");
951 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
956 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
957 if ((mtype_cap >> bit_idx) & 0x1)
958 p += sprintf(p, "%s ", mem_types[bit_idx]);
964 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
968 p += mci_output_mtype_cap(p,mci->mtype_cap);
969 p += sprintf(p, "\n");
974 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
976 int total_pages, csrow_idx;
978 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
980 struct csrow_info *csrow = &mci->csrows[csrow_idx];
982 if (!csrow->nr_pages)
984 total_pages += csrow->nr_pages;
987 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
990 struct mcidev_attribute {
991 struct attribute attr;
992 ssize_t (*show)(struct mem_ctl_info *,char *);
993 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
996 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
997 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
999 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
1002 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1003 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1005 if (mcidev_attr->show)
1006 return mcidev_attr->show(mem_ctl_info, buffer);
1010 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
1011 const char *buffer, size_t count)
1013 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1014 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1016 if (mcidev_attr->store)
1017 return mcidev_attr->store(mem_ctl_info, buffer, count);
1021 static struct sysfs_ops mci_ops = {
1022 .show = mcidev_show,
1023 .store = mcidev_store
1026 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
1027 struct mcidev_attribute mci_attr_##_name = { \
1028 .attr = {.name = __stringify(_name), .mode = _mode }, \
1034 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
1036 /* Attribute files */
1037 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
1038 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
1039 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
1040 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
1041 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
1042 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
1043 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
1044 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
1045 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1046 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1047 mci_edac_current_capability_show,NULL);
1048 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1049 mci_supported_mem_type_show,NULL);
1052 static struct mcidev_attribute *mci_attr[] = {
1053 &mci_attr_reset_counters,
1054 &mci_attr_module_name,
1056 &mci_attr_edac_capability,
1057 &mci_attr_edac_current_capability,
1058 &mci_attr_supported_mem_type,
1060 &mci_attr_seconds_since_reset,
1061 &mci_attr_ue_noinfo_count,
1062 &mci_attr_ce_noinfo_count,
1070 * Release of a MC controlling instance
1072 static void edac_mci_instance_release(struct kobject *kobj)
1074 struct mem_ctl_info *mci;
1077 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1078 complete(&mci->kobj_complete);
1081 static struct kobj_type ktype_mci = {
1082 .release = edac_mci_instance_release,
1083 .sysfs_ops = &mci_ops,
1084 .default_attrs = (struct attribute **) mci_attr,
1087 #endif /* DISABLE_EDAC_SYSFS */
1089 #define EDAC_DEVICE_SYMLINK "device"
1092 * Create a new Memory Controller kobject instance,
1093 * mc<id> under the 'mc' directory
1099 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1100 #ifdef DISABLE_EDAC_SYSFS
1108 struct csrow_info *csrow;
1109 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1111 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1113 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1115 /* set the name of the mc<id> object */
1116 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1120 /* link to our parent the '..../edac/mc' object */
1121 edac_mci_kobj->parent = &edac_memctrl_kobj;
1122 edac_mci_kobj->ktype = &ktype_mci;
1124 /* register the mc<id> kobject */
1125 err = kobject_register(edac_mci_kobj);
1129 /* create a symlink for the device */
1130 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1131 EDAC_DEVICE_SYMLINK);
1135 /* Make directories for each CSROW object
1136 * under the mc<id> kobject
1138 for (i = 0; i < mci->nr_csrows; i++) {
1140 csrow = &mci->csrows[i];
1142 /* Only expose populated CSROWs */
1143 if (csrow->nr_pages > 0) {
1144 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1153 /* CSROW error: backout what has already been registered, */
1155 for ( i--; i >= 0; i--) {
1156 if (csrow->nr_pages > 0) {
1157 init_completion(&csrow->kobj_complete);
1158 kobject_unregister(&mci->csrows[i].kobj);
1159 wait_for_completion(&csrow->kobj_complete);
1164 init_completion(&mci->kobj_complete);
1165 kobject_unregister(edac_mci_kobj);
1166 wait_for_completion(&mci->kobj_complete);
1170 #endif /* DISABLE_EDAC_SYSFS */
1173 * remove a Memory Controller instance
1175 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1177 #ifndef DISABLE_EDAC_SYSFS
1180 debugf0("%s()\n", __func__);
1182 /* remove all csrow kobjects */
1183 for (i = 0; i < mci->nr_csrows; i++) {
1184 if (mci->csrows[i].nr_pages > 0) {
1185 init_completion(&mci->csrows[i].kobj_complete);
1186 kobject_unregister(&mci->csrows[i].kobj);
1187 wait_for_completion(&mci->csrows[i].kobj_complete);
1191 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1192 init_completion(&mci->kobj_complete);
1193 kobject_unregister(&mci->edac_mci_kobj);
1194 wait_for_completion(&mci->kobj_complete);
1195 #endif /* DISABLE_EDAC_SYSFS */
1198 /* END OF sysfs data and methods */
1200 #ifdef CONFIG_EDAC_DEBUG
1203 void edac_mc_dump_channel(struct channel_info *chan)
1205 debugf4("\tchannel = %p\n", chan);
1206 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1207 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1208 debugf4("\tchannel->label = '%s'\n", chan->label);
1209 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1211 EXPORT_SYMBOL(edac_mc_dump_channel);
1214 void edac_mc_dump_csrow(struct csrow_info *csrow)
1216 debugf4("\tcsrow = %p\n", csrow);
1217 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1218 debugf4("\tcsrow->first_page = 0x%lx\n",
1220 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1221 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1222 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1223 debugf4("\tcsrow->nr_channels = %d\n",
1224 csrow->nr_channels);
1225 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1226 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1228 EXPORT_SYMBOL(edac_mc_dump_csrow);
1231 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1233 debugf3("\tmci = %p\n", mci);
1234 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1235 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1236 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1237 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1238 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1239 mci->nr_csrows, mci->csrows);
1240 debugf3("\tpdev = %p\n", mci->pdev);
1241 debugf3("\tmod_name:ctl_name = %s:%s\n",
1242 mci->mod_name, mci->ctl_name);
1243 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1245 EXPORT_SYMBOL(edac_mc_dump_mci);
1248 #endif /* CONFIG_EDAC_DEBUG */
1250 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1251 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1252 * compiler would provide for X and return the aligned result.
1254 * If 'size' is a constant, the compiler will optimize this whole function
1255 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1257 static inline char * align_ptr (void *ptr, unsigned size)
1261 /* Here we assume that the alignment of a "long long" is the most
1262 * stringent alignment that the compiler will ever provide by default.
1263 * As far as I know, this is a reasonable assumption.
1265 if (size > sizeof(long))
1266 align = sizeof(long long);
1267 else if (size > sizeof(int))
1268 align = sizeof(long);
1269 else if (size > sizeof(short))
1270 align = sizeof(int);
1271 else if (size > sizeof(char))
1272 align = sizeof(short);
1274 return (char *) ptr;
1279 return (char *) ptr;
1281 return (char *) (((unsigned long) ptr) + align - r);
1286 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1287 * @size_pvt: size of private storage needed
1288 * @nr_csrows: Number of CWROWS needed for this MC
1289 * @nr_chans: Number of channels for the MC
1291 * Everything is kmalloc'ed as one big chunk - more efficient.
1292 * Only can be used if all structures have the same lifetime - otherwise
1293 * you have to allocate and initialize your own structures.
1295 * Use edac_mc_free() to free mc structures allocated by this function.
1298 * NULL allocation failed
1299 * struct mem_ctl_info pointer
1301 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1304 struct mem_ctl_info *mci;
1305 struct csrow_info *csi, *csrow;
1306 struct channel_info *chi, *chp, *chan;
1311 /* Figure out the offsets of the various items from the start of an mc
1312 * structure. We want the alignment of each item to be at least as
1313 * stringent as what the compiler would provide if we could simply
1314 * hardcode everything into a single struct.
1316 mci = (struct mem_ctl_info *) 0;
1317 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1318 chi = (struct channel_info *)
1319 align_ptr(&csi[nr_csrows], sizeof(*chi));
1320 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1321 size = ((unsigned long) pvt) + sz_pvt;
1323 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1326 /* Adjust pointers so they point within the memory we just allocated
1327 * rather than an imaginary chunk of memory located at address 0.
1329 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1330 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1331 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1333 memset(mci, 0, size); /* clear all fields */
1336 mci->pvt_info = pvt;
1337 mci->nr_csrows = nr_csrows;
1339 for (row = 0; row < nr_csrows; row++) {
1341 csrow->csrow_idx = row;
1343 csrow->nr_channels = nr_chans;
1344 chp = &chi[row * nr_chans];
1345 csrow->channels = chp;
1347 for (chn = 0; chn < nr_chans; chn++) {
1349 chan->chan_idx = chn;
1350 chan->csrow = csrow;
1356 EXPORT_SYMBOL(edac_mc_alloc);
1360 * edac_mc_free: Free a previously allocated 'mci' structure
1361 * @mci: pointer to a struct mem_ctl_info structure
1363 void edac_mc_free(struct mem_ctl_info *mci)
1367 EXPORT_SYMBOL(edac_mc_free);
1369 static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev)
1371 struct mem_ctl_info *mci;
1372 struct list_head *item;
1374 debugf3("%s()\n", __func__);
1376 list_for_each(item, &mc_devices) {
1377 mci = list_entry(item, struct mem_ctl_info, link);
1379 if (mci->pdev == pdev)
1386 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1388 struct list_head *item, *insert_before;
1389 struct mem_ctl_info *p;
1392 if (list_empty(&mc_devices)) {
1394 insert_before = &mc_devices;
1396 if (find_mci_by_pdev(mci->pdev)) {
1397 edac_printk(KERN_WARNING, EDAC_MC,
1398 "%s (%s) %s %s already assigned %d\n",
1399 mci->pdev->dev.bus_id,
1400 pci_name(mci->pdev), mci->mod_name,
1401 mci->ctl_name, mci->mc_idx);
1405 insert_before = NULL;
1408 list_for_each(item, &mc_devices) {
1409 p = list_entry(item, struct mem_ctl_info, link);
1411 if (p->mc_idx != i) {
1412 insert_before = item;
1421 if (insert_before == NULL)
1422 insert_before = &mc_devices;
1425 list_add_tail_rcu(&mci->link, insert_before);
1430 static void complete_mc_list_del (struct rcu_head *head)
1432 struct mem_ctl_info *mci;
1434 mci = container_of(head, struct mem_ctl_info, rcu);
1435 INIT_LIST_HEAD(&mci->link);
1436 complete(&mci->complete);
1440 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1442 list_del_rcu(&mci->link);
1443 init_completion(&mci->complete);
1444 call_rcu(&mci->rcu, complete_mc_list_del);
1445 wait_for_completion(&mci->complete);
1450 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
1451 * create sysfs entries associated with mci structure
1452 * @mci: pointer to the mci structure to be added to the list
1459 /* FIXME - should a warning be printed if no error detection? correction? */
1460 int edac_mc_add_mc(struct mem_ctl_info *mci)
1462 debugf0("%s()\n", __func__);
1463 #ifdef CONFIG_EDAC_DEBUG
1464 if (edac_debug_level >= 3)
1465 edac_mc_dump_mci(mci);
1466 if (edac_debug_level >= 4) {
1469 for (i = 0; i < mci->nr_csrows; i++) {
1471 edac_mc_dump_csrow(&mci->csrows[i]);
1472 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1473 edac_mc_dump_channel(&mci->csrows[i].
1478 down(&mem_ctls_mutex);
1480 if (add_mc_to_global_list(mci))
1483 /* set load time so that error rate can be tracked */
1484 mci->start_time = jiffies;
1486 if (edac_create_sysfs_mci_device(mci)) {
1487 edac_mc_printk(mci, KERN_WARNING,
1488 "failed to create sysfs device\n");
1492 /* Report action taken */
1493 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n",
1494 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1496 up(&mem_ctls_mutex);
1500 del_mc_from_global_list(mci);
1503 up(&mem_ctls_mutex);
1506 EXPORT_SYMBOL(edac_mc_add_mc);
1510 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
1511 * remove mci structure from global list
1512 * @pdev: Pointer to 'struct pci_dev' representing mci structure to remove.
1514 * Return pointer to removed mci structure, or NULL if device not found.
1516 struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev)
1518 struct mem_ctl_info *mci;
1520 debugf0("MC: %s()\n", __func__);
1521 down(&mem_ctls_mutex);
1523 if ((mci = find_mci_by_pdev(pdev)) == NULL) {
1524 up(&mem_ctls_mutex);
1528 edac_remove_sysfs_mci_device(mci);
1529 del_mc_from_global_list(mci);
1530 up(&mem_ctls_mutex);
1531 edac_printk(KERN_INFO, EDAC_MC,
1532 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx,
1533 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1536 EXPORT_SYMBOL(edac_mc_del_mc);
1539 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1544 unsigned long flags = 0;
1546 debugf3("%s()\n", __func__);
1548 /* ECC error page was not in our memory. Ignore it. */
1549 if(!pfn_valid(page))
1552 /* Find the actual page structure then map it and fix */
1553 pg = pfn_to_page(page);
1555 if (PageHighMem(pg))
1556 local_irq_save(flags);
1558 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1560 /* Perform architecture specific atomic scrub operation */
1561 atomic_scrub(virt_addr + offset, size);
1563 /* Unmap and complete */
1564 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1566 if (PageHighMem(pg))
1567 local_irq_restore(flags);
1569 EXPORT_SYMBOL(edac_mc_scrub_block);
1572 /* FIXME - should return -1 */
1573 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1576 struct csrow_info *csrows = mci->csrows;
1579 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
1582 for (i = 0; i < mci->nr_csrows; i++) {
1583 struct csrow_info *csrow = &csrows[i];
1585 if (csrow->nr_pages == 0)
1588 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
1589 "mask(0x%lx)\n", mci->mc_idx, __func__,
1590 csrow->first_page, page, csrow->last_page,
1593 if ((page >= csrow->first_page) &&
1594 (page <= csrow->last_page) &&
1595 ((page & csrow->page_mask) ==
1596 (csrow->first_page & csrow->page_mask))) {
1603 edac_mc_printk(mci, KERN_ERR,
1604 "could not look up page error address %lx\n",
1605 (unsigned long) page);
1609 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1612 /* FIXME - setable log (warning/emerg) levels */
1613 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1614 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1615 unsigned long page_frame_number,
1616 unsigned long offset_in_page,
1617 unsigned long syndrome, int row, int channel,
1620 unsigned long remapped_page;
1622 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1624 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1625 if (row >= mci->nr_csrows || row < 0) {
1626 /* something is wrong */
1627 edac_mc_printk(mci, KERN_ERR,
1628 "INTERNAL ERROR: row out of range "
1629 "(%d >= %d)\n", row, mci->nr_csrows);
1630 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1633 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1634 /* something is wrong */
1635 edac_mc_printk(mci, KERN_ERR,
1636 "INTERNAL ERROR: channel out of range "
1637 "(%d >= %d)\n", channel,
1638 mci->csrows[row].nr_channels);
1639 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1644 /* FIXME - put in DIMM location */
1645 edac_mc_printk(mci, KERN_WARNING,
1646 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
1647 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
1648 page_frame_number, offset_in_page,
1649 mci->csrows[row].grain, syndrome, row, channel,
1650 mci->csrows[row].channels[channel].label, msg);
1653 mci->csrows[row].ce_count++;
1654 mci->csrows[row].channels[channel].ce_count++;
1656 if (mci->scrub_mode & SCRUB_SW_SRC) {
1658 * Some MC's can remap memory so that it is still available
1659 * at a different address when PCI devices map into memory.
1660 * MC's that can't do this lose the memory where PCI devices
1661 * are mapped. This mapping is MC dependant and so we call
1662 * back into the MC driver for it to map the MC page to
1663 * a physical (CPU) page which can then be mapped to a virtual
1664 * page - which can then be scrubbed.
1666 remapped_page = mci->ctl_page_to_phys ?
1667 mci->ctl_page_to_phys(mci, page_frame_number) :
1670 edac_mc_scrub_block(remapped_page, offset_in_page,
1671 mci->csrows[row].grain);
1674 EXPORT_SYMBOL(edac_mc_handle_ce);
1677 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1681 edac_mc_printk(mci, KERN_WARNING,
1682 "CE - no information available: %s\n", msg);
1683 mci->ce_noinfo_count++;
1686 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1689 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1690 unsigned long page_frame_number,
1691 unsigned long offset_in_page, int row,
1694 int len = EDAC_MC_LABEL_LEN * 4;
1695 char labels[len + 1];
1700 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1702 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1703 if (row >= mci->nr_csrows || row < 0) {
1704 /* something is wrong */
1705 edac_mc_printk(mci, KERN_ERR,
1706 "INTERNAL ERROR: row out of range "
1707 "(%d >= %d)\n", row, mci->nr_csrows);
1708 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1712 chars = snprintf(pos, len + 1, "%s",
1713 mci->csrows[row].channels[0].label);
1716 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1718 chars = snprintf(pos, len + 1, ":%s",
1719 mci->csrows[row].channels[chan].label);
1725 edac_mc_printk(mci, KERN_EMERG,
1726 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
1727 "labels \"%s\": %s\n", page_frame_number,
1728 offset_in_page, mci->csrows[row].grain, row, labels,
1733 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1734 " labels \"%s\": %s\n", mci->mc_idx,
1735 page_frame_number, offset_in_page,
1736 mci->csrows[row].grain, row, labels, msg);
1739 mci->csrows[row].ue_count++;
1741 EXPORT_SYMBOL(edac_mc_handle_ue);
1744 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1748 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1751 edac_mc_printk(mci, KERN_WARNING,
1752 "UE - no information available: %s\n", msg);
1753 mci->ue_noinfo_count++;
1756 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1761 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1766 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1767 pci_read_config_word(dev, where, &status);
1769 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1770 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1773 if (status == 0xFFFF) {
1775 pci_read_config_dword(dev, 0, &sanity);
1776 if (sanity == 0xFFFFFFFF)
1779 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1783 /* reset only the bits we are interested in */
1784 pci_write_config_word(dev, where, status);
1789 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1791 /* Clear any PCI parity errors logged by this device. */
1792 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1796 get_pci_parity_status(dev, 0);
1798 /* read the device TYPE, looking for bridges */
1799 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1801 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1802 get_pci_parity_status(dev, 1);
1806 * PCI Parity polling
1809 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1814 /* read the STATUS register on this device
1816 status = get_pci_parity_status(dev, 0);
1818 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1820 /* check the status reg for errors */
1822 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1823 edac_printk(KERN_CRIT, EDAC_PCI,
1824 "Signaled System Error on %s\n",
1827 if (status & (PCI_STATUS_PARITY)) {
1828 edac_printk(KERN_CRIT, EDAC_PCI,
1829 "Master Data Parity Error on %s\n",
1832 atomic_inc(&pci_parity_count);
1835 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1836 edac_printk(KERN_CRIT, EDAC_PCI,
1837 "Detected Parity Error on %s\n",
1840 atomic_inc(&pci_parity_count);
1844 /* read the device TYPE, looking for bridges */
1845 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1847 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1849 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1850 /* On bridges, need to examine secondary status register */
1851 status = get_pci_parity_status(dev, 1);
1853 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1854 status, dev->dev.bus_id );
1856 /* check the secondary status reg for errors */
1858 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1859 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1860 "Signaled System Error on %s\n",
1863 if (status & (PCI_STATUS_PARITY)) {
1864 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1865 "Master Data Parity Error on "
1866 "%s\n", pci_name(dev));
1868 atomic_inc(&pci_parity_count);
1871 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1872 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1873 "Detected Parity Error on %s\n",
1876 atomic_inc(&pci_parity_count);
1883 * check_dev_on_list: Scan for a PCI device on a white/black list
1884 * @list: an EDAC &edac_pci_device_list white/black list pointer
1885 * @free_index: index of next free entry on the list
1886 * @pci_dev: PCI Device pointer
1888 * see if list contains the device.
1890 * Returns: 0 not found
1893 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1894 struct pci_dev *dev)
1897 int rc = 0; /* Assume not found */
1898 unsigned short vendor=dev->vendor;
1899 unsigned short device=dev->device;
1901 /* Scan the list, looking for a vendor/device match
1903 for (i = 0; i < free_index; i++, list++ ) {
1904 if ( (list->vendor == vendor ) &&
1905 (list->device == device )) {
1915 * pci_dev parity list iterator
1916 * Scan the PCI device list for one iteration, looking for SERRORs
1917 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1919 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1921 struct pci_dev *dev=NULL;
1923 /* request for kernel access to the next PCI device, if any,
1924 * and while we are looking at it have its reference count
1925 * bumped until we are done with it
1927 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1929 /* if whitelist exists then it has priority, so only scan those
1930 * devices on the whitelist
1932 if (pci_whitelist_count > 0 ) {
1933 if (check_dev_on_list(pci_whitelist,
1934 pci_whitelist_count, dev))
1938 * if no whitelist, then check if this devices is
1941 if (!check_dev_on_list(pci_blacklist,
1942 pci_blacklist_count, dev))
1948 static void do_pci_parity_check(void)
1950 unsigned long flags;
1953 debugf3("%s()\n", __func__);
1955 if (!check_pci_parity)
1958 before_count = atomic_read(&pci_parity_count);
1960 /* scan all PCI devices looking for a Parity Error on devices and
1963 local_irq_save(flags);
1964 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1965 local_irq_restore(flags);
1967 /* Only if operator has selected panic on PCI Error */
1968 if (panic_on_pci_parity) {
1969 /* If the count is different 'after' from 'before' */
1970 if (before_count != atomic_read(&pci_parity_count))
1971 panic("EDAC: PCI Parity Error");
1976 static inline void clear_pci_parity_errors(void)
1978 /* Clear any PCI bus parity errors that devices initially have logged
1979 * in their registers.
1981 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1985 #else /* CONFIG_PCI */
1988 static inline void do_pci_parity_check(void)
1994 static inline void clear_pci_parity_errors(void)
2000 #endif /* CONFIG_PCI */
2003 * Iterate over all MC instances and check for ECC, et al, errors
2005 static inline void check_mc_devices (void)
2007 struct list_head *item;
2008 struct mem_ctl_info *mci;
2010 debugf3("%s()\n", __func__);
2012 down(&mem_ctls_mutex);
2014 list_for_each(item, &mc_devices) {
2015 mci = list_entry(item, struct mem_ctl_info, link);
2017 if (mci->edac_check != NULL)
2018 mci->edac_check(mci);
2021 up(&mem_ctls_mutex);
2026 * Check MC status every poll_msec.
2027 * Check PCI status every poll_msec as well.
2029 * This where the work gets done for edac.
2031 * SMP safe, doesn't use NMI, and auto-rate-limits.
2033 static void do_edac_check(void)
2035 debugf3("%s()\n", __func__);
2037 do_pci_parity_check();
2040 static int edac_kernel_thread(void *arg)
2042 while (!kthread_should_stop()) {
2045 /* goto sleep for the interval */
2046 schedule_timeout_interruptible((HZ * poll_msec) / 1000);
2055 * module initialization entry point
2057 static int __init edac_mc_init(void)
2059 edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n");
2062 * Harvest and clear any boot/initialization PCI parity errors
2064 * FIXME: This only clears errors logged by devices present at time of
2065 * module initialization. We should also do an initial clear
2066 * of each newly hotplugged device.
2068 clear_pci_parity_errors();
2070 /* Create the MC sysfs entires */
2071 if (edac_sysfs_memctrl_setup()) {
2072 edac_printk(KERN_ERR, EDAC_MC,
2073 "Error initializing sysfs code\n");
2077 /* Create the PCI parity sysfs entries */
2078 if (edac_sysfs_pci_setup()) {
2079 edac_sysfs_memctrl_teardown();
2080 edac_printk(KERN_ERR, EDAC_MC,
2081 "EDAC PCI: Error initializing sysfs code\n");
2085 /* create our kernel thread */
2086 edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac");
2087 if (IS_ERR(edac_thread)) {
2088 /* remove the sysfs entries */
2089 edac_sysfs_memctrl_teardown();
2090 edac_sysfs_pci_teardown();
2091 return PTR_ERR(edac_thread);
2100 * module exit/termination functioni
2102 static void __exit edac_mc_exit(void)
2104 debugf0("%s()\n", __func__);
2106 kthread_stop(edac_thread);
2108 /* tear down the sysfs device */
2109 edac_sysfs_memctrl_teardown();
2110 edac_sysfs_pci_teardown();
2116 module_init(edac_mc_init);
2117 module_exit(edac_mc_exit);
2119 MODULE_LICENSE("GPL");
2120 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2121 "Based on.work by Dan Hollis et al");
2122 MODULE_DESCRIPTION("Core library routines for MC reporting");
2124 module_param(panic_on_ue, int, 0644);
2125 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2126 module_param(check_pci_parity, int, 0644);
2127 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2128 module_param(panic_on_pci_parity, int, 0644);
2129 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2130 module_param(log_ue, int, 0644);
2131 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2132 module_param(log_ce, int, 0644);
2133 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2134 module_param(poll_msec, int, 0644);
2135 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2136 #ifdef CONFIG_EDAC_DEBUG
2137 module_param(edac_debug_level, int, 0644);
2138 MODULE_PARM_DESC(edac_debug_level, "Debug level");