From 95ffa2438d0e9c48779f0106b1c0eb36165e759c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Apr 2008 03:52:33 -0700 Subject: [PATCH] x86: mtrr cleanup for converting continuous to discrete layout, v8 some BIOS like to use continus MTRR layout, and X driver can not add WB entries for graphical cards when 4g or more RAM installed. the patch will change MTRR to discrete. mtrr_chunk_size= could be used to have smaller continuous block to hold holes. default is 256m, could be set according to size of graphics card memory. mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs v2: fix -1 for UC checking v3: default to disable, and need use enable_mtrr_cleanup to enable this feature skip the var state change warning. remove next_basek in range_to_mtrr() v4: correct warning mask. v5: CONFIG_MTRR_SANITIZER v6: fix 1g, 2g, 512 aligment with extra hole v7: gran_sizek to prevent running out of MTRRs. v8: fix hole_basek caculation caused when removing next_basek gran_sizek using when basek is 0. need to apply [PATCH] x86: fix trimming e820 with MTRR holes. right after this one. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 14 + arch/x86/Kconfig | 26 ++ arch/x86/kernel/cpu/mtrr/generic.c | 32 +- arch/x86/kernel/cpu/mtrr/main.c | 467 +++++++++++++++++++++++++++- arch/x86/kernel/cpu/mtrr/mtrr.h | 3 + 5 files changed, 528 insertions(+), 14 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e07c432c73..44427607b7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -599,6 +599,20 @@ and is between 256 and 4096 characters. It is defined in the file See drivers/char/README.epca and Documentation/digiepca.txt. + disable_mtrr_cleanup [X86] + enable_mtrr_cleanup [X86] + The kernel tries to adjust MTRR layout from continuous + to discrete, to make X server driver able to add WB + entry later. This parameter enables/disables that. + + mtrr_chunk_size=nn[KMG] [X86] + used for mtrr cleanup. It is largest continous chunk + that could hold holes aka. UC entries. + + mtrr_gran_size=nn[KMG] [X86] + used for mtrr cleanup. It is granity of mtrr block. + Big value could prevent small alignment use up MTRRs. + disable_mtrr_trim [X86, Intel and AMD only] By default the kernel will trim any uncacheable memory out of your available memory pool based on diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef..97a1764a3d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1092,6 +1092,32 @@ config MTRR See for more information. +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + help + Convert MTRR layout from continuous to discrete, so some X driver + could add WB entries. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots). + + Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size + could be used to send largest mtrr entry size for continuous block + to hold holes (aka. UC entries) + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + def_bool y + prompt "Enable MTRR cleanup by default" + depends on MTRR_SANITIZER + help + Enable mtrr cleanup by default + + If unsure, say Y. + config X86_PAT bool prompt "x86 PAT support" diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0625d4158e..5aae648600 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = { static unsigned long smp_changes_mask; static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; -static u64 tom2; +u64 mtrr_tom2; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." @@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) } } - if (tom2) { - if (start >= (1ULL<<32) && (end < tom2)) + if (mtrr_tom2) { + if (start >= (1ULL<<32) && (end < mtrr_tom2)) return MTRR_TYPE_WRBACK; } @@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); } +/* fill the MSR pair relating to a var range */ +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) +{ + struct mtrr_var_range *vr; + + vr = mtrr_state.var_ranges; + + vr[index].base_lo = base_lo; + vr[index].base_hi = base_hi; + vr[index].mask_lo = mask_lo; + vr[index].mask_hi = mask_hi; +} + static void get_fixed_ranges(mtrr_type * frs) { @@ -216,10 +230,10 @@ void __init get_mtrr_state(void) unsigned low, high; /* TOP_MEM2 */ rdmsr(MSR_K8_TOP_MEM2, low, high); - tom2 = high; - tom2 <<= 32; - tom2 |= low; - tom2 &= 0xffffff8000000ULL; + mtrr_tom2 = high; + mtrr_tom2 <<= 32; + mtrr_tom2 |= low; + mtrr_tom2 &= 0xffffff8000000ULL; } if (mtrr_show) { int high_width; @@ -251,9 +265,9 @@ void __init get_mtrr_state(void) else printk(KERN_INFO "MTRR %u disabled\n", i); } - if (tom2) { + if (mtrr_tom2) { printk(KERN_INFO "TOM2: %016llx aka %lldM\n", - tom2, tom2>>20); + mtrr_tom2, mtrr_tom2>>20); } } mtrr_state_set = 1; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6a1e278d93..8a6f68b45e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -609,6 +610,452 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +#ifdef CONFIG_MTRR_SANITIZER + +#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT +static int enable_mtrr_cleanup __initdata = 1; +#else +static int enable_mtrr_cleanup __initdata; +#endif + +#else + +static int enable_mtrr_cleanup __initdata = -1; + +#endif + +static int __init disable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 0; + return 0; +} +early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); + +static int __init enable_mtrr_cleanup_setup(char *str) +{ + if (enable_mtrr_cleanup != -1) + enable_mtrr_cleanup = 1; + return 0; +} +early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); + +#define RANGE_NUM 256 + +struct res_range { + unsigned long start; + unsigned long end; +}; + +static int __init add_range(struct res_range *range, int nr_range, unsigned long start, + unsigned long end, int merge) +{ + int i; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < nr_range; i++) { + unsigned long final_start, final_end; + unsigned long common_start, common_end; + + if (!range[i].end) + continue; + + common_start = max(range[i].start, start); + common_end = min(range[i].end, end); + if (common_start > common_end + 1) + continue; + + final_start = min(range[i].start, start); + final_end = max(range[i].end, end); + + range[i].start = final_start; + range[i].end = final_end; + return nr_range; + } + +addit: + /* need to add that */ + if (nr_range >= RANGE_NUM) + return nr_range; + + range[nr_range].start = start; + range[nr_range].end = end; + + nr_range++; + + return nr_range; + +} +static void __init subtract_range(struct res_range *range, unsigned long start, + unsigned long end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static int __init cmp_range(const void *x1, const void *x2) +{ + const struct res_range *r1 = x1; + const struct res_range *r2 = x2; + long start1, start2; + + start1 = r1->start; + start2 = r2->start; + + return start1 - start2; +} + +struct var_mtrr_state { + unsigned long range_startk, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + unsigned int reg; + unsigned address_bits; +}; + +static void __init set_var_mtrr( + unsigned int reg, unsigned long basek, unsigned long sizek, + unsigned char type, unsigned address_bits) +{ + u32 base_lo, base_hi, mask_lo, mask_hi; + unsigned address_mask_high; + + if (!sizek) { + fill_mtrr_var_range(reg, 0, 0, 0, 0); + return; + } + + address_mask_high = ((1u << (address_bits - 32u)) - 1u); + + base_hi = basek >> 22; + base_lo = basek << 10; + + if (sizek < 4*1024*1024) { + mask_hi = address_mask_high; + mask_lo = ~((sizek << 10) - 1); + } else { + mask_hi = address_mask_high & (~((sizek >> 22) - 1)); + mask_lo = 0; + } + + base_lo |= type; + mask_lo |= 0x800; + fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi); +} + +static unsigned int __init range_to_mtrr(unsigned int reg, + unsigned long range_startk, unsigned long range_sizek, + unsigned char type, unsigned address_bits) +{ + if (!range_sizek || (reg >= num_var_ranges)) + return reg; + + while (range_sizek) { + unsigned long max_align, align; + unsigned long sizek; + /* Compute the maximum size I can make a range */ + if (range_startk) + max_align = ffs(range_startk) - 1; + else + max_align = 32; + align = fls(range_sizek) - 1; + if (align > max_align) + align = max_align; + + sizek = 1 << align; + printk(KERN_INFO "Setting variable MTRR %d, base: %ldMB, range: %ldMB, type %s\n", + reg, range_startk >> 10, sizek >> 10, + (type == MTRR_TYPE_UNCACHABLE)?"UC": + ((type == MTRR_TYPE_WRBACK)?"WB":"Other") + ); + set_var_mtrr(reg++, range_startk, sizek, type, address_bits); + range_startk += sizek; + range_sizek -= sizek; + if (reg >= num_var_ranges) + break; + } + return reg; +} + +static void __init range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek) +{ + unsigned long hole_basek, hole_sizek; + unsigned long range0_basek, range0_sizek; + unsigned long range_basek, range_sizek; + unsigned long chunk_sizek; + unsigned long gran_sizek; + + hole_basek = 0; + hole_sizek = 0; + chunk_sizek = state->chunk_sizek; + gran_sizek = state->gran_sizek; + + /* align with gran size, prevent small block used up MTRRs */ + range_basek = ALIGN(state->range_startk, gran_sizek); + if ((range_basek > basek) && basek) + return; + range_sizek = ALIGN(state->range_sizek - (range_basek - state->range_startk), gran_sizek); + + while (range_basek + range_sizek > (state->range_startk + state->range_sizek)) { + range_sizek -= gran_sizek; + if (!range_sizek) + return; + } + state->range_startk = range_basek; + state->range_sizek = range_sizek; + + /* try to append some small hole */ + range0_basek = state->range_startk; + range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + if ((range0_sizek == state->range_sizek) || + ((range0_basek + range0_sizek - chunk_sizek > basek) && basek)) { + printk(KERN_INFO "rangeX: %016lx - %016lx\n", range0_basek<<10, (range0_basek + state->range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + state->range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + return; + } + + + range0_sizek -= chunk_sizek; + printk(KERN_INFO "range0: %016lx - %016lx\n", range0_basek<<10, (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK, state->address_bits); + + range_basek = range0_basek + range0_sizek; + range_sizek = chunk_sizek; + if (range_sizek - (state->range_sizek - range0_sizek) < (chunk_sizek >> 1)) { + hole_sizek = range_sizek - (state->range_sizek - range0_sizek); + hole_basek = range_basek + range_sizek - hole_sizek; + } else + range_sizek = state->range_sizek - range0_sizek; + + printk(KERN_INFO "range: %016lx - %016lx\n", range_basek<<10, (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK, state->address_bits); + if (hole_sizek) { + printk(KERN_INFO "hole: %016lx - %016lx\n", hole_basek<<10, (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE, state->address_bits); + } +} + +static void __init set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, unsigned long size_pfn) +{ + unsigned long basek, sizek; + + if (state->reg >= num_var_ranges) + return; + + basek = base_pfn << (PAGE_SHIFT - 10); + sizek = size_pfn << (PAGE_SHIFT - 10); + + /* See if I can merge with the last range */ + if ((basek <= 1024) || (state->range_startk + state->range_sizek == basek)) { + unsigned long endk = basek + sizek; + state->range_sizek = endk - state->range_startk; + return; + } + /* Write the range mtrrs */ + if (state->range_sizek != 0) { + range_to_mtrr_with_hole(state, basek); + + state->range_startk = 0; + state->range_sizek = 0; + } + /* Allocate an msr */ + state->range_startk = basek; + state->range_sizek = sizek; +} + +/* mininum size of mtrr block that can take hole */ +static u64 mtrr_chunk_size __initdata = (256ULL<<20); + +static int __init parse_mtrr_chunk_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_chunk_size = memparse(p, &p); + return 0; +} +early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); + +/* granity of mtrr of block */ +static u64 mtrr_gran_size __initdata = (64ULL<<20); + +static int __init parse_mtrr_gran_size_opt(char *p) +{ + if (!p) + return -EINVAL; + mtrr_gran_size = memparse(p, &p); + return 0; +} +early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); + +static void __init x86_setup_var_mtrrs(struct res_range *range, int nr_range, unsigned address_bits) +{ + struct var_mtrr_state var_state; + int i; + + var_state.range_startk = 0; + var_state.range_sizek = 0; + var_state.reg = 0; + var_state.address_bits = address_bits; + var_state.chunk_sizek = mtrr_chunk_size >> 10; + var_state.gran_sizek = mtrr_gran_size >> 10; + + /* Write the range etc */ + for (i = 0; i < nr_range; i++) + set_var_mtrr_range(&var_state, range[i].start, range[i].end - range[i].start + 1); + + /* Write the last range */ + range_to_mtrr_with_hole(&var_state, 0); + printk(KERN_INFO "DONE variable MTRRs\n"); + /* Clear out the extra MTRR's */ + while (var_state.reg < num_var_ranges) + set_var_mtrr(var_state.reg++, 0, 0, 0, var_state.address_bits); +} + +static int __init x86_get_mtrr_mem_range(struct res_range *range, int nr_range, unsigned long extra_remove_base, unsigned long extra_remove_size) +{ + unsigned long i, base, size; + mtrr_type type; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_WRBACK) + continue; + nr_range = add_range(range, nr_range, base, base + size - 1, 1); + } + printk(KERN_INFO "After WB checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + /* take out UC ranges */ + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_UNCACHABLE) + continue; + if (!size) + continue; + subtract_range(range, base, base + size - 1); + } + if (extra_remove_size) + subtract_range(range, extra_remove_base, extra_remove_base + extra_remove_size - 1); + + /* get new range num */ + nr_range = 0; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + nr_range++; + } + printk(KERN_INFO "After UC checking\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + printk(KERN_INFO "After sorting\n"); + for (i = 0; i < nr_range; i++) + printk(KERN_INFO "MTRR MAP PFN: %016lx - %016lx\n", range[i].start, range[i].end + 1); + + return nr_range; +} + +static int __init mtrr_cleanup(unsigned address_bits) +{ + unsigned long i, base, size, def, dummy; + mtrr_type type; + struct res_range range[RANGE_NUM]; + int nr_range; + unsigned long extra_remove_base, extra_remove_size; + + /* extra one for all 0 */ + int num[MTRR_NUM_TYPES + 1]; + + if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* check entries number */ + memset(num, 0, sizeof(num)); + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type >= MTRR_NUM_TYPES) + continue; + if (!size) + type = MTRR_NUM_TYPES; + num[type]++; + } + + /* check if we got UC entries */ + if (!num[MTRR_TYPE_UNCACHABLE]) + return 0; + + /* check if we only had WB and UC */ + if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != + num_var_ranges - num[MTRR_NUM_TYPES]) + return 0; + + memset(range, 0, sizeof(range)); + extra_remove_size = 0; + if (mtrr_tom2) { + extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_size = (mtrr_tom2>>PAGE_SHIFT) - extra_remove_base; + } + nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + + /* convert ranges to var ranges state */ + x86_setup_var_mtrrs(range, nr_range, address_bits); + + return 1; + +} + static int disable_mtrr_trim; static int __init disable_mtrr_trim_setup(char *str) @@ -729,18 +1176,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ void __init mtrr_bp_init(void) { + u32 phys_addr; init_ifs(); + phys_addr = 32; + if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; + phys_addr = 36; /* This is an AMD specific MSR, but we assume(hope?) that Intel will implement it to when they extend the address bus of the Xeon. */ if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -758,6 +1208,7 @@ void __init mtrr_bp_init(void) don't support PAE */ size_or_mask = 0xfff00000; /* 32 bits */ size_and_mask = 0; + phys_addr = 32; } } else { switch (boot_cpu_data.x86_vendor) { @@ -791,8 +1242,13 @@ void __init mtrr_bp_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - if (use_intel()) + if (use_intel()) { get_mtrr_state(); + + if (mtrr_cleanup(phys_addr)) + mtrr_if->set_all(); + + } } } @@ -829,9 +1285,10 @@ static int __init mtrr_init_finialize(void) { if (!mtrr_if) return 0; - if (use_intel()) - mtrr_state_warn(); - else { + if (use_intel()) { + if (enable_mtrr_cleanup < 1) + mtrr_state_warn(); + } else { /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2cc77eb6fe..2dc4ec656b 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); +void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); extern void set_mtrr_ops(struct mtrr_ops * ops); @@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if; #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) extern unsigned int num_var_ranges; +extern u64 mtrr_tom2; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); -- 2.39.5