summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAssaf Gordon <assafgordon@gmail.com>2018-01-11 22:20:52 (GMT)
committerAssaf Gordon <assafgordon@gmail.com>2018-01-22 23:38:40 (GMT)
commitbe2e91b6f5dacd819e723cea859893368f8278ce (patch)
tree73a0906029eb4be075e5448ecff78c9fd56056b1
parentef42f4551fe9939f90a1d963a1ddcea6f5a8c2d1 (diff)
downloaddatamash-be2e91b6f5dacd819e723cea859893368f8278ce.zip
datamash-be2e91b6f5dacd819e723cea859893368f8278ce.tar.gz
datamash-be2e91b6f5dacd819e723cea859893368f8278ce.tar.bz2
datamash: add --format=FORMAT option
* NEWS: Mention this. * Maefile.am: * doc/datamash.texi: Mention new option. * src/datamash.c * src/double-format.{c,h}: * src/text-options.{c,h}: * tests/datamash-error-msgs.pl, tests/datamash-output-format.pl: Test new option. * tests/datamash-valgrind.sh: Test large output buffer under valgrind.
-rw-r--r--Makefile.am1
-rw-r--r--NEWS7
-rw-r--r--doc/datamash.texi5
-rw-r--r--src/datamash.c10
-rw-r--r--src/double-format.c84
-rw-r--r--src/double-format.h26
-rw-r--r--src/text-options.c8
-rw-r--r--src/text-options.h3
-rw-r--r--tests/datamash-error-msgs.pl25
-rwxr-xr-xtests/datamash-output-format.pl42
-rwxr-xr-xtests/datamash-valgrind.sh6
11 files changed, 217 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am
index 1baa5ab..cdef98c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -39,6 +39,7 @@ datamash_SOURCES = src/system.h \
src/op-parser.c src/op-parser.h \
src/field-ops.c src/field-ops.h \
src/crosstab.c src/crosstab.h \
+ src/double-format.c src/double-format.h \
src/datamash.c
datamash_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) $(MINGW_CFLAGS)
diff --git a/NEWS b/NEWS
index 7489a6b..c1a906a 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,13 @@
** New Features
+ New option: --format=FMT sets printf style floating-point format.
+ Example:
+ $ echo '50.5' | datamash --format "%07.3f" sum 1
+ 050.500
+ $ echo '50.5' | datamash --format "%07.3e" sum 1
+ 5.050e+01
+
New option: -R/--round=N rounds numeric values to N decimal places.
New option: --output-delimiter=X overrides -t/-W.
diff --git a/doc/datamash.texi b/doc/datamash.texi
index 4519cad..94e114e 100644
--- a/doc/datamash.texi
+++ b/doc/datamash.texi
@@ -276,6 +276,11 @@ value.
@table @option
+@item --format=@var{FORMAT}
+@opindex --format
+print numeric values with printf style floating-point @var{FORMAT}.
+
+
@item --field-separator=@var{x}
@itemx -t @var{x}
@opindex --field-separator
diff --git a/src/datamash.c b/src/datamash.c
index b8d4c1d..1a65a05 100644
--- a/src/datamash.c
+++ b/src/datamash.c
@@ -107,6 +107,7 @@ enum
NO_STRICT_OPTION,
REMOVE_NA_VALUES_OPTION,
OUTPUT_DELIMITER_OPTION,
+ CUSTOM_FORMAT_OPTION,
UNDOC_PRINT_INF_OPTION,
UNDOC_PRINT_NAN_OPTION,
UNDOC_PRINT_PROGNAME_OPTION,
@@ -127,6 +128,7 @@ static struct option const long_options[] =
{"headers", no_argument, NULL, 'H'},
{"full", no_argument, NULL, 'f'},
{"filler", required_argument, NULL, 'F'},
+ {"format", required_argument, NULL, CUSTOM_FORMAT_OPTION},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
{"sort", no_argument, NULL, 's'},
{"no-strict", no_argument, NULL, NO_STRICT_OPTION},
@@ -252,6 +254,10 @@ which require a pair of fields (e.g. 'pcov 2:6').\n"), stdout);
-t, --field-separator=X use X instead of TAB as field delimiter\n\
"), stdout);
fputs (_("\
+ --format=FORMAT print numeric values with printf style\n\
+ floating-point FORMAT.\n\
+"), stdout);
+ fputs (_("\
--output-delimiter=X use X instead as output field delimiter\n\
(default: use same delimiter as -t/-W)\n\
"), stdout);
@@ -1181,6 +1187,10 @@ int main (int argc, char* argv[])
strict = false;
break;
+ case CUSTOM_FORMAT_OPTION:
+ set_numeric_printf_format (optarg);
+ break;
+
case REMOVE_NA_VALUES_OPTION:
remove_na_values = true;
break;
diff --git a/src/double-format.c b/src/double-format.c
new file mode 100644
index 0000000..63c76d5
--- /dev/null
+++ b/src/double-format.c
@@ -0,0 +1,84 @@
+/* GNU Datamash - perform simple calculation on input data
+
+ Copyright (C) 2018 Assaf Gordon <assafgordon@gmail.com>
+ Copyright (C) 1994-2018 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+/*
+Portions of this function were copied from GNU coreutils' seq.c,
+hence FSF copyright.
+*/
+
+
+#include <config.h>
+
+#include "system.h"
+#include "die.h"
+#include "quote.h"
+#include "ignore-value.h"
+#include "xalloc.h"
+
+
+char*
+validate_double_format (char const *fmt)
+{
+ size_t i;
+ size_t len;
+ char *out;
+
+ len = strlen (fmt);
+
+ /* extra space for NUL and 'L' printf-modifier */
+ out = xmalloc (len+2);
+
+ for (i = 0; ! (fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
+ if (!fmt[i])
+ die (EXIT_FAILURE, 0,
+ _("format %s has no %% directive"), quote (fmt));
+
+ i++;
+ i += strspn (fmt + i, "-+#0 '");
+ i += strspn (fmt + i, "0123456789");
+ if (fmt[i] == '.')
+ {
+ i++;
+ i += strspn (fmt + i, "0123456789");
+ }
+
+ if (!fmt[i])
+ die (EXIT_FAILURE, 0,
+ _("format %s missing valid type after '%%'"), quote (fmt));
+
+ if (! strchr ("efgaEFGA", fmt[i]))
+ die (EXIT_FAILURE, 0,
+ _("format %s has unknown/invalid type %%%c directive"),
+ quote (fmt), fmt[i]);
+
+ /* Copy characters until the type character, add 'L', then the type,
+ then the rest of the format string. */
+ memcpy (out, fmt, i);
+ out[i] = 'L';
+ out[i+1] = fmt[i];
+ memcpy (out+i+2, fmt+i+1, len-i);
+ out[len+1] = '\0';
+
+ for (i++; fmt[i] ; i += (fmt[i] == '%') + 1)
+ if (fmt[i] == '%' && fmt[i + 1] != '%')
+ die (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
+ quote (fmt));
+
+ return out;
+}
diff --git a/src/double-format.h b/src/double-format.h
new file mode 100644
index 0000000..3ada9be
--- /dev/null
+++ b/src/double-format.h
@@ -0,0 +1,26 @@
+/* GNU Datamash - perform simple calculation on input data
+
+ Copyright (C) 2018 Assaf Gordon <assafgordon@gmail.com>
+
+ This file is part of GNU Datamash.
+
+ GNU Datamash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ GNU Datamash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNU Datamash. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef __DOUBLE_FORMAT_H__
+#define __DOUBLE_FORMAT_H__
+
+char*
+validate_double_format (char const *fmt);
+
+#endif
diff --git a/src/text-options.c b/src/text-options.c
index ba274d3..0d2f52d 100644
--- a/src/text-options.c
+++ b/src/text-options.c
@@ -27,6 +27,7 @@
#include "system.h"
#include "die.h"
+#include "double-format.h"
#include "text-options.h"
/* The character marking end of line. Default to \n. */
@@ -116,3 +117,10 @@ set_numeric_output_precision(const char* digits)
finalize_numeric_output_buffer ();
}
+
+void
+set_numeric_printf_format (const char* format)
+{
+ numeric_output_format = validate_double_format (format);
+ finalize_numeric_output_buffer ();
+}
diff --git a/src/text-options.h b/src/text-options.h
index f074e38..baeda3c 100644
--- a/src/text-options.h
+++ b/src/text-options.h
@@ -85,4 +85,7 @@ print_line_separator ()
void
set_numeric_output_precision(const char* digits);
+void
+set_numeric_printf_format (const char* format);
+
#endif
diff --git a/tests/datamash-error-msgs.pl b/tests/datamash-error-msgs.pl
index 0420505..109d526 100644
--- a/tests/datamash-error-msgs.pl
+++ b/tests/datamash-error-msgs.pl
@@ -221,6 +221,31 @@ my @Tests =
{ERR=>"$prog: invalid rounding digits value '0'\n"}],
['e113','--round "51"', {IN_PIPE=>""}, {EXIT=>1},
{ERR=>"$prog: invalid rounding digits value '51'\n"}],
+
+ # Custom Output Formats
+ ['e120','--format ""', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '' has no % directive\n"}],
+ ['e121','--format "foobar"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format 'foobar' has no % directive\n"}],
+ ['e122','--format "aa%%ff"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format 'aa%%ff' has no % directive\n"}],
+ ['e123','--format "%Lg"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%Lg' has unknown/invalid type %L directive\n"}],
+ ['e124','--format "%*g"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%*g' has unknown/invalid type %* directive\n"}],
+ ['e125','--format "%g %f"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%g %f' has too many % directives\n"}],
+ ['e126','--format "%"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%' missing valid type after '%'\n"}],
+ ['e127','--format "%3"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%3' missing valid type after '%'\n"}],
+ ['e128','--format "%#.4"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%#.4' missing valid type after '%'\n"}],
+ ['e129','--format "%f%"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%f%' has too many % directives\n"}],
+ ['e130','--format "%f%3"', {IN_PIPE=>""}, {EXIT=>1},
+ {ERR=>"$prog: format '%f%3' has too many % directives\n"}],
+
);
my $save_temps = $ENV{SAVE_TEMPS};
diff --git a/tests/datamash-output-format.pl b/tests/datamash-output-format.pl
index c987ac5..52c68a2 100755
--- a/tests/datamash-output-format.pl
+++ b/tests/datamash-output-format.pl
@@ -63,6 +63,48 @@ my @Tests =
# Test multiple rounding options
['r8', '--round 3 -R 7 sum 1', {IN_PIPE=>$in1}, {OUT => "1.0000090\n"}],
['r9', '--round 7 -R 3 sum 1', {IN_PIPE=>$in1}, {OUT => "1.000\n"}],
+
+
+ # Test Custom formats: %f
+ ['f1', '--format "%07.3f" sum 1', {IN_PIPE=>$in1}, {OUT => "001.000\n"}],
+ ['f2', '--format "%.7f" sum 1', {IN_PIPE=>$in1}, {OUT => "1.0000090\n"}],
+ ['f3', '--format "%10f" sum 1', {IN_PIPE=>$in1}, {OUT => " 1.000009\n"}],
+ ['f4', '--format "%-10f" sum 1', {IN_PIPE=>$in1}, {OUT => "1.000009 \n"}],
+ ['f5', '--format "%+10f" sum 1', {IN_PIPE=>$in1}, {OUT => " +1.000009\n"}],
+ # Test %#f (alternate form: always show decimal point)
+ ['f6', '--format "%.0f" sum 1', {IN_PIPE=>$in1}, {OUT => "1\n"}],
+ ['f7', '--format "%#.0f" sum 1', {IN_PIPE=>$in1}, {OUT => "1.\n"}],
+
+ # Test Custom formats: %g
+ ['g1', '--format "%g" sum 1', {IN_PIPE=>$in1}, {OUT => "1.00001\n"}],
+ ['g2', '--format "%10g" sum 1', {IN_PIPE=>$in1}, {OUT => " 1.00001\n"}],
+ ['g3', '--format "%010g" sum 1', {IN_PIPE=>$in1}, {OUT => "0001.00001\n"}],
+ ['g4', '--format "%.10g" sum 1', {IN_PIPE=>$in1}, {OUT => "1.000009\n"}],
+ ['g5', '--format "%.3g" sum 1', {IN_PIPE=>$in1}, {OUT => "1\n"}],
+ # Test %#g (alternate form: don't trim zero decimal digits)
+ ['g6', '--format "%.4g" sum 1', {IN_PIPE=>$in1}, {OUT => "1\n"}],
+ ['g7', '--format "%#.4g" sum 1', {IN_PIPE=>$in1}, {OUT => "1.000\n"}],
+
+ # Test Custom formats: %e
+ ['e1', '--format "%e" sum 1', {IN_PIPE=>$in1}, {OUT=>"1.000009e+00\n"}],
+ ['e2', '--format "%.3e" sum 1', {IN_PIPE=>$in1}, {OUT=>"1.000e+00\n"}],
+
+ # Test Custom formats: %a
+ ['a1', '--format "%0.3a" sum 1', {IN_PIPE=>$in1}, {OUT=>"0x8.000p-3\n"}],
+
+
+ # Custom formats can use lots of memory
+ ['m1', '--format "%04000.0f" sum 1', {IN_PIPE=>$in1},
+ {OUT => "0" x 3999 . "1\n"}],
+
+ # due to binary floating representation, some decimal point digits won't be
+ # zero (e.g. 1.0000090000000000000000000000000523453254320000000...).
+ # The OUT_SUBST replaces exactly 3994 digits (as expected from the format)
+ # with an "X".
+ ['m2', '--format "%.4000f" sum 1', {IN_PIPE=>$in1},
+ {OUT => "1.000009X\n"},
+ {OUT_SUBST => 's/^(1\.000009)([0-9]{3994})$/\1X/'}],
+
);
diff --git a/tests/datamash-valgrind.sh b/tests/datamash-valgrind.sh
index dcd010f..918285e 100755
--- a/tests/datamash-valgrind.sh
+++ b/tests/datamash-valgrind.sh
@@ -187,4 +187,10 @@ cmp wide wide_orig ||
{ warn_ "base64 decoding failed (decoded output does not match original)";
fail=1 ; }
+## Test large output formats
+cat wide | valgrind --track-origins=yes --leak-check=full \
+ --show-reachable=yes --error-exitcode=1 \
+ datamash --format "%05000.5000f" sum 1 > /dev/null ||
+ { warn_ "custom-format failed" ; fail=1 ; }
+
Exit $fail