From: Joey Hess Date: Fri, 11 Apr 2008 03:28:15 +0000 (-0400) Subject: make sponge use a temp file if the input is large X-Git-Tag: 0.29~21 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=866dfb32b3ba7d0be153608b28dd9f461986251d;p=moreutils make sponge use a temp file if the input is large patch from Brock Noland --- diff --git a/physmem.c b/physmem.c new file mode 100644 index 0000000..0fcb5e9 --- /dev/null +++ b/physmem.c @@ -0,0 +1,300 @@ +/* Calculate the size of physical memory. + + Copyright (C) 2000, 2001, 2003, 2005, 2006 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert. */ + +#include + +#if HAVE_SYS_PSTAT_H +# include +#endif + +#if HAVE_SYS_SYSMP_H +# include +#endif + +#if HAVE_SYS_SYSINFO_H && HAVE_MACHINE_HAL_SYSINFO_H +# include +# include +#endif + +#if HAVE_SYS_TABLE_H +# include +#endif + +#include + +#if HAVE_SYS_PARAM_H +# include +#endif + +#if HAVE_SYS_SYSCTL_H +# include +#endif + +#if HAVE_SYS_SYSTEMCFG_H +# include +#endif + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +/* MEMORYSTATUSEX is missing from older windows headers, so define + a local replacement. */ +typedef struct +{ + DWORD dwLength; + DWORD dwMemoryLoad; + DWORDLONG ullTotalPhys; + DWORDLONG ullAvailPhys; + DWORDLONG ullTotalPageFile; + DWORDLONG ullAvailPageFile; + DWORDLONG ullTotalVirtual; + DWORDLONG ullAvailVirtual; + DWORDLONG ullAvailExtendedVirtual; +} lMEMORYSTATUSEX; +typedef WINBOOL (WINAPI *PFN_MS_EX) (lMEMORYSTATUSEX*); +#endif + +#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +/* Return the total amount of physical memory. */ +double +physmem_total (void) +{ +#if defined _SC_PHYS_PAGES && defined _SC_PAGESIZE + { /* This works on linux-gnu, solaris2 and cygwin. */ + double pages = sysconf (_SC_PHYS_PAGES); + double pagesize = sysconf (_SC_PAGESIZE); + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } +#endif + +#if HAVE_PSTAT_GETSTATIC + { /* This works on hpux11. */ + struct pst_static pss; + if (0 <= pstat_getstatic (&pss, sizeof pss, 1, 0)) + { + double pages = pss.physical_memory; + double pagesize = pss.page_size; + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } + } +#endif + +#if HAVE_SYSMP && defined MP_SAGET && defined MPSA_RMINFO && defined _SC_PAGESIZE + { /* This works on irix6. */ + struct rminfo realmem; + if (sysmp (MP_SAGET, MPSA_RMINFO, &realmem, sizeof realmem) == 0) + { + double pagesize = sysconf (_SC_PAGESIZE); + double pages = realmem.physmem; + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } + } +#endif + +#if HAVE_GETSYSINFO && defined GSI_PHYSMEM + { /* This works on Tru64 UNIX V4/5. */ + int physmem; + + if (getsysinfo (GSI_PHYSMEM, (caddr_t) &physmem, sizeof (physmem), + NULL, NULL, NULL) == 1) + { + double kbytes = physmem; + + if (0 <= kbytes) + return kbytes * 1024.0; + } + } +#endif + +#if HAVE_SYSCTL && defined HW_PHYSMEM + { /* This works on *bsd and darwin. */ + unsigned int physmem; + size_t len = sizeof physmem; + static int mib[2] = { CTL_HW, HW_PHYSMEM }; + + if (sysctl (mib, ARRAY_SIZE (mib), &physmem, &len, NULL, 0) == 0 + && len == sizeof (physmem)) + return (double) physmem; + } +#endif + +#if HAVE__SYSTEM_CONFIGURATION + /* This works on AIX. */ + return _system_configuration.physmem; +#endif + +#if defined _WIN32 + { /* this works on windows */ + PFN_MS_EX pfnex; + HMODULE h = GetModuleHandle ("kernel32.dll"); + + if (!h) + return 0.0; + + /* Use GlobalMemoryStatusEx if available. */ + if ((pfnex = (PFN_MS_EX) GetProcAddress (h, "GlobalMemoryStatusEx"))) + { + lMEMORYSTATUSEX lms_ex; + lms_ex.dwLength = sizeof lms_ex; + if (!pfnex (&lms_ex)) + return 0.0; + return (double) lms_ex.ullTotalPhys; + } + + /* Fall back to GlobalMemoryStatus which is always available. + but returns wrong results for physical memory > 4GB. */ + else + { + MEMORYSTATUS ms; + GlobalMemoryStatus (&ms); + return (double) ms.dwTotalPhys; + } + } +#endif + + /* Guess 64 MB. It's probably an older host, so guess small. */ + return 64 * 1024 * 1024; +} + +/* Return the amount of physical memory available. */ +double +physmem_available (void) +{ +#if defined _SC_AVPHYS_PAGES && defined _SC_PAGESIZE + { /* This works on linux-gnu, solaris2 and cygwin. */ + double pages = sysconf (_SC_AVPHYS_PAGES); + double pagesize = sysconf (_SC_PAGESIZE); + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } +#endif + +#if HAVE_PSTAT_GETSTATIC && HAVE_PSTAT_GETDYNAMIC + { /* This works on hpux11. */ + struct pst_static pss; + struct pst_dynamic psd; + if (0 <= pstat_getstatic (&pss, sizeof pss, 1, 0) + && 0 <= pstat_getdynamic (&psd, sizeof psd, 1, 0)) + { + double pages = psd.psd_free; + double pagesize = pss.page_size; + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } + } +#endif + +#if HAVE_SYSMP && defined MP_SAGET && defined MPSA_RMINFO && defined _SC_PAGESIZE + { /* This works on irix6. */ + struct rminfo realmem; + if (sysmp (MP_SAGET, MPSA_RMINFO, &realmem, sizeof realmem) == 0) + { + double pagesize = sysconf (_SC_PAGESIZE); + double pages = realmem.availrmem; + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } + } +#endif + +#if HAVE_TABLE && defined TBL_VMSTATS + { /* This works on Tru64 UNIX V4/5. */ + struct tbl_vmstats vmstats; + + if (table (TBL_VMSTATS, 0, &vmstats, 1, sizeof (vmstats)) == 1) + { + double pages = vmstats.free_count; + double pagesize = vmstats.pagesize; + + if (0 <= pages && 0 <= pagesize) + return pages * pagesize; + } + } +#endif + +#if HAVE_SYSCTL && defined HW_USERMEM + { /* This works on *bsd and darwin. */ + unsigned int usermem; + size_t len = sizeof usermem; + static int mib[2] = { CTL_HW, HW_USERMEM }; + + if (sysctl (mib, ARRAY_SIZE (mib), &usermem, &len, NULL, 0) == 0 + && len == sizeof (usermem)) + return (double) usermem; + } +#endif + +#if defined _WIN32 + { /* this works on windows */ + PFN_MS_EX pfnex; + HMODULE h = GetModuleHandle ("kernel32.dll"); + + if (!h) + return 0.0; + + /* Use GlobalMemoryStatusEx if available. */ + if ((pfnex = (PFN_MS_EX) GetProcAddress (h, "GlobalMemoryStatusEx"))) + { + lMEMORYSTATUSEX lms_ex; + lms_ex.dwLength = sizeof lms_ex; + if (!pfnex (&lms_ex)) + return 0.0; + return (double) lms_ex.ullAvailPhys; + } + + /* Fall back to GlobalMemoryStatus which is always available. + but returns wrong results for physical memory > 4GB */ + else + { + MEMORYSTATUS ms; + GlobalMemoryStatus (&ms); + return (double) ms.dwAvailPhys; + } + } +#endif + + /* Guess 25% of physical memory. */ + return physmem_total () / 4; +} + + +#if DEBUG + +# include +# include + +int +main (void) +{ + printf ("%12.f %12.f\n", physmem_total (), physmem_available ()); + exit (0); +} + +#endif /* DEBUG */ + +/* +Local Variables: +compile-command: "gcc -DDEBUG -g -O -Wall -W physmem.c" +End: +*/ diff --git a/sponge.c b/sponge.c index 3fd3ab4..ae32718 100644 --- a/sponge.c +++ b/sponge.c @@ -25,69 +25,280 @@ #include #include #include +/* MAX() */ +#include #include #include #include +#include +/* SIZE_MAX */ +#include +#include + +#include "physmem.c" + +#define MIN_SPONGE_SIZE sizeof(8192) +#define BUFF_SIZE 8192 +#define DEFAULT_TMP_NAME "/tmp/sponge.XXXXXX" +char tmpname[] = DEFAULT_TMP_NAME; void usage() { printf("sponge : suck in all input from stdin and write it to \n"); exit(0); } +/* all the signal stuff copied from gnu sort */ + +/* The set of signals that are caught. */ +static sigset_t caught_signals; + +/* Critical section status. */ +struct cs_status +{ + int valid; // was bool + sigset_t sigs; +}; + +/* Enter a critical section. */ +static struct cs_status +cs_enter (void) +{ + struct cs_status status; + status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0); + return status; +} + +/* Leave a critical section. */ +static void +cs_leave (struct cs_status status) +{ + if (status.valid) + { + /* Ignore failure when restoring the signal mask. */ + sigprocmask (SIG_SETMASK, &status.sigs, NULL); + } +} + + +static void cleanup() { + unlink(tmpname); +} + +static void +onexit_cleanup (void) +{ + struct cs_status cs = cs_enter (); + cleanup (); + cs_leave (cs); +} + +static void +sighandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, SIG_IGN); + + cleanup (); + + signal (sig, SIG_DFL); + raise (sig); +} + + +/* taken from coreutils sort */ +static size_t +default_sponge_size (void) +{ + /* Let MEM be available memory or 1/8 of total memory, whichever + is greater. */ + double avail = physmem_available (); + double total = physmem_total (); + double mem = MAX (avail, total / 8); + struct rlimit rlimit; + + /* Let SIZE be MEM, but no more than the maximum object size or + system resource limits. Avoid the MIN macro here, as it is not + quite right when only one argument is floating point. Don't + bother to check for values like RLIM_INFINITY since in practice + they are not much less than SIZE_MAX. */ + size_t size = SIZE_MAX; + if (mem < size) + size = mem; + if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) + size = rlimit.rlim_cur; +#ifdef RLIMIT_AS + if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) + size = rlimit.rlim_cur; +#endif + + /* Leave a large safety margin for the above limits, as failure can + occur when they are exceeded. */ + size /= 2; + +#ifdef RLIMIT_RSS + /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. + Exceeding RSS is not fatal, but can be quite slow. */ + if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) + size = rlimit.rlim_cur / 16 * 15; +#endif + + /* Use no less than the minimum. */ + return MAX (size, MIN_SPONGE_SIZE); +} + int main(int argc, char **argv) { - char *buf, *bufstart; - size_t bufsize = 8192; + char *buf, *bufstart, *outname = NULL; + size_t bufsize = BUFF_SIZE; size_t bufused = 0; ssize_t i = 0; - FILE *outf; - + FILE *tmpfile = 0; if (argc > 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) { usage(); } - bufstart = buf = malloc(bufsize); if (!buf) { - perror("malloc"); + perror("failed to allocate memory"); exit(1); } + { + static int const sig[] = + { + /* The usual suspects. */ + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGVTALRM + SIGVTALRM, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + enum { nsigs = sizeof sig / sizeof sig[0] }; + +#if SA_NOCLDSTOP + struct sigaction act; + + sigemptyset (&caught_signals); + for (i = 0; i < nsigs; i++) + { + sigaction (sig[i], NULL, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, sig[i]); + } + + act.sa_handler = sighandler; + act.sa_mask = caught_signals; + act.sa_flags = 0; + + for (i = 0; i < nsigs; i++) + if (sigismember (&caught_signals, sig[i])) + sigaction (sig[i], &act, NULL); +#else + for (i = 0; i < nsigs; i++) + if (signal (sig[i], SIG_IGN) != SIG_IGN) + { + signal (sig[i], sighandler); + siginterrupt (sig[i], 1); + } +#endif + } + + + size_t mem_available = default_sponge_size(); while ((i = read(0, buf, bufsize - bufused)) > 0) { bufused = bufused+i; if (bufused == bufsize) { + if(bufsize >= mem_available) { + if(!tmpfile) { + umask(077); + struct cs_status cs = cs_enter (); + int tmpfd = mkstemp(tmpname); + atexit(onexit_cleanup); // if solaris on_exit(onexit_cleanup, 0); + cs_leave (cs); + if(tmpfd < 0) { + perror("mkstemp failed to open a temporary file"); + exit(1); + } + tmpfile = fdopen(tmpfd, "w+"); + } + if(fwrite(bufstart, bufsize, 1, tmpfile) < 1) { + perror("writing to tempory file failed"); + fclose(tmpfile); + exit(1); + } + bufused = 0; + } else { bufsize *= 2; bufstart = realloc(bufstart, bufsize); if (!bufstart) { - perror("realloc"); + perror("failed to realloc memory"); exit(1); } } + } buf = bufstart + bufused; } - if (i == -1) { - perror("read"); + if (i < 0) { + perror("failed to read from stdin"); exit(1); } - if (argc == 2) { - outf = fopen(argv[1], "w"); - if (! outf) { - fprintf(stderr, "Can't open %s: %s\n", argv[1], strerror(errno)); + outname = argv[1]; + } + if(tmpfile) { + if(fwrite(bufstart, bufused, 1, tmpfile) < 1) { + perror("write tmpfile"); + fclose(tmpfile); + exit(1); + } + if(outname) { + fclose(tmpfile); + if(rename(tmpname, outname)) { + perror("error renaming temporary file to output file"); exit(1); } } else { - outf = stdout; + if(fseek(tmpfile, 0, SEEK_SET)) { + perror("could to seek to start of temporary file"); + fclose(tmpfile); + exit(1); } - - if (fwrite(bufstart, bufused, 1, outf) < 1) { - perror("fwrite"); + while (fread( buf, BUFF_SIZE, 1, tmpfile) < 1) { + if(fwrite(buf, BUFF_SIZE, 1, stdout) < 1) { + perror("error writing out merged file"); exit(1); } - - if (fclose(outf) != 0) { - perror("fclose"); + } + fclose(tmpfile); + unlink(tmpname); + } + } + else { + if(outname) { + FILE *outfd = fopen(outname, "w"); + if(outfd < 0) { + perror("error opening output file"); exit(1); } - + if(fwrite(bufstart, bufused, 1, outfd) < 1) { + perror("error writing out merged file"); + exit(1); + } + } + else if(fwrite(bufstart, bufused, 1, stdout) < 1) { + perror("error writing out merged file"); + exit(1); + } + } return 0; }