//==============================================================================
// fsplit.cpp
//	Split a binary file into several smaller files.
//
//	The resulting output files can then be reconstituted into the original
//	binary file using the Unix 'cat(1)' command, or the MS-DOS or MS/Windows
//	'type' or 'copy' commands.
//
// Usage
//	fsplit [-option...] file
//	
// Options
//	-ns
//	Do not create output files if the input file is smaller than the maximum
//	output size.
//
//	-o file
//	Output filename prefix. All output files are composed of this prefix
//	followed by a '.' and a sequence number. By default, the prefix is the
//	same as the input filename.
//
//	-s num
//	Maximum output file size (in bytes).  The default is 1m (1 megabyte).
//	The size may have one of the following radix suffixes:
//	    h	Hexadecimal (base 16)
//	    o	Octal (base 8)
//	The size may have one of the following unit suffixes:
//	    b	Blocks (512 bytes)
//	    k	Kilobytes (1,024 bytes)
//	    m	Megabytes (1,048,576 bytes)
//	    s	Sectors (128 bytes)
//
//	-v
//	Verbose output, display the output filenames as they are written.
//
//	-w num
//	Number of digits in the output filename suffix (the default is 3).
//
//	An input filename of "-" indicates the standard input.
//
// Notice
//	Written by David R. Tribble, Apr 2009.
//
//	Copyright Š2009 by David R. Tribble, all rights reserved.
//	Permission is granted to any person or entity except those designated
//	by the United States Department of State as a terrorist, or terrorist
//	government or agency, to use and distribute this source code provided
//	that the original copyright notice remains present and unaltered.
//==============================================================================

// Identification

static char	REV[] =
    "@(#)drt/src/cmd/fsplit.cpp $Revision: 1.4 $ $Date: 2009/05/01 19:19:42 $";

#define PROG	"fsplit"
#ifndef VERS
 #define VERS	"2.0"
#endif
#ifndef DATE
 #define DATE	"2009-04-30"
#endif

static char	BUILT[] =
    "@(#)" "Built: " __DATE__ " " __TIME__;

static char	COPYRIGHT[] =
    "@(#)" "Copyright Š2009 by David R. Tribble, all rights reserved.";


//------------------------------------------------------------------------------
// Includes

#include <ctype.h>
#include <errno.h>
#include <iso646.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <sys/stat.h>

#if _WIN32
 #include <fcntl.h>
 #include <io.h>
#endif


// Datatype 'long long'
#if _WIN32 && _MSC_VER < 1200
 #define llong_t	__int64
 #define FMT_LLONG	"I64"
#else
 #define llong_t	long long
 #define FMT_LLONG	"ll"
#endif


//------------------------------------------------------------------------------
// Manifest constants

// Filename suffix separator
#ifndef SUFF_SEP
 #define SUFF_SEP	"."
#endif

// I/O buffer size
#ifndef BUFSIZE
 #define BUFSIZE	(64*1024)
#endif

// Default maximum output file size
#ifndef MAXSIZE
 #define MAXSIZE	(1*1024*1024L)
#endif


//------------------------------------------------------------------------------
// class Program
//	Embodies this program.
//------------------------------------------------------------------------------

#define Program_VS	100			// Class version

class Program
{
// Constants

public:
    // Program exit codes
    enum ExitCode
    {
        RC_OKAY =	0,			// Success
        RC_READ =	1,			// I/O read error
        RC_WRITE =	2,			// I/O write error
        RC_NAME	=	3,			// Output filename too long
        RC_STAT =	4,			// Can't stat() the file
        RC_SMALL =	5,			// Input file is too small
        RC_USAGE =	255			// Bad command usage
    };

private:
    // Radix suffix types
    enum Radix
    {
        RADIX_DEC =	'd',			// None, decimal
        RADIX_HEX =	'h',			// Hexadecimal
    };

    // Program usage help message
    static const char *	USAGE_TEXT[];

    // Default max output file size
    static const long	DFL_MAXSIZE =	MAXSIZE;

    // I/O buffer size
    static const int	IO_BUFSIZE =	BUFSIZE;

// ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
// Functions

public:
    // Destructor
    ~Program();

    // Constructor
    Program();

    // Split a file into several smaller files
    int main(int argc, const char **argv);

    // Display a program usage message and punt
    void usage();

private:
    // Parse a numeric filesize string
    bool parseNum(const char *s, llong_t *val);

    // Split a named file into smaller files
    int splitFile(const char *fname);

    // Not implemented
    Program(const Program &o);
    const Program & operator =(const Program &o);

// ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
// Variables

private:
    const char *	m_inName;		// Source filename
    const char *	m_outName;		// Output filenames prefix
    enum Radix		m_radix;		// Sequence number radix
    llong_t		m_maxSize;		// Max output file size
    int			m_digits;		// Output digit width
    bool		m_debugs;		// Debugging output enabled
    bool		m_verbose;		// Verbose output
    bool		m_overwrite;		// Overwrite existing files
    bool		m_writeSmall;		// Write small output files
};


//------------------------------------------------------------------------------
// Program::~Program()
//	Destructor.
//------------------------------------------------------------------------------
Program::~Program()
{
#if Program_VS != 100
 #error class Program has changed
#endif
}


//------------------------------------------------------------------------------
// Program::Program()
//	Constructor.
//------------------------------------------------------------------------------
Program::Program():
    m_inName(NULL),
    m_outName(NULL),
    m_radix(RADIX_DEC),
    m_maxSize(DFL_MAXSIZE),
    m_digits(3),
    m_debugs(false),
    m_verbose(false),
    m_overwrite(false),
    m_writeSmall(true)
{
#if Program_VS != 100
 #error class Program has changed
#endif
}


//------------------------------------------------------------------------------
// Program::USAGE_TEXT[]
//	Program usage help message text.
//------------------------------------------------------------------------------
const char *	Program::USAGE_TEXT[] =
{
#if Program_VS/100 != 1
 #error class Program has changed
#endif

    "[" PROG " - " VERS ", " DATE "] (" "david" "\100" "tribble.com" ")",
    "",
    "Split a file into several smaller files.",
    "",
    "Usage: " PROG " [-option...] file",
    "",
    "Options:",
#if NOT_SUPPORTED__
    "    -f          Overwrite existing output files.",
    "",
#endif
#if NOT_SUPPORTED__
    "    -h          Output files are named with a hexadecimal suffix.",
    "",
#endif
    "    -ns         Do not create output files if the input file is smaller "
    "than",
    "                the maximum output size.",
    "",
    "    -o file     Output filename prefix. All output files are composed of "
    "this",
    "                prefix followed by a '" SUFF_SEP "' and a sequence number."
    " By default,",
    "                the prefix is the same as the input filename.",
    "",
    "    -s num      Maximum output file size (in bytes).",
    "                "
    "The default is 1m (1 megabyte).",
    "                The size may have one of the following radix suffixes:",
    "                    h   Hexadecimal (base 16)",
    "                    o   Octal (base 8)",
    "                The size may have one of the following unit suffixes:",
    "                    b   Blocks (512 bytes)",
    "                    k   Kilobytes (1,024 bytes)",
    "                    m   Megabytes (1,048,576 bytes)",
    "                    s   Sectors (128 bytes)",
    "",
    "    -v          Verbose output, display the output filenames as they are",
    "                written.",
    "",
    "    -w num      Number of digits in the output filename suffix",
    "                (the default is 3).",
    "",
    "An input filename of \"-\" indicates the standard input.",
    NULL
};


//------------------------------------------------------------------------------
// Program::usage()
//	Display a program usage help message and punt.
//------------------------------------------------------------------------------
void Program::usage()
{
#if Program_VS/100 != 1
 #error class Program has changed
#endif

    // Display a program usage help message and punt
    for (int i = 0;  USAGE_TEXT[i] != NULL;  i++)
        ::printf("%s\n", USAGE_TEXT[i]);
    ::fflush(stdout);

    // Punt
    ::exit(RC_USAGE);
}


//------------------------------------------------------------------------------
// Program::main()
//	Split a file into several smaller files.
//------------------------------------------------------------------------------
int Program::main(int argc, const char **argv)
{
#if Program_VS/100 != 1
 #error class Program has changed
#endif

    int		i;
    int		rc;

    // Parse the command line options
    for (i = 1;  i < argc  and  argv[i][0] == '-';  i++)
    {
        const char *	arg;

        arg = argv[i];
        if (::strcmp(arg, "-") == 0)
            break;
        else if (::strcmp(arg, "--") == 0)
        {
            i++;
            break;
        }
        else if (::strcmp(arg, "-D") == 0)
            m_debugs = true;
#if NOT_SUPPORTED__
        else if (::strcmp(arg, "-f") == 0)
            m_overwrite = true;
#endif
#if NOT_SUPPORTED__
        else if (::strcmp(arg, "-h") == 0  and  i+i < argc)
            m_radix = RADIX_HEX;
#endif
        else if (::strcmp(arg, "-ns") == 0)
            m_writeSmall = false;
        else if (::strcmp(arg, "-o") == 0  and  i+1 < argc)
            m_outName = argv[++i];
        else if (::strcmp(arg, "-s") == 0  and  i+1 < argc)
        {
            if (not parseNum(argv[++i], &m_maxSize))
            {
                ::printf("Bad size (-s): %s\n", argv[i]);
                return RC_USAGE;
            }
        }
        else if (::strcmp(arg, "-v") == 0)
            m_verbose = true;
        else if (::strcmp(arg, "-w") == 0  and  i+1 < argc)
        {
            if (::sscanf(argv[++i], "%d", &m_digits) == EOF)
            {
                ::printf("Bad width (-w): %s\n", argv[i]);
                return RC_USAGE;
            }
        }
        else
        {
            ::printf("Invalid option: %s\n", arg);
            usage();
        }
    }

    // Check the command args
    if (i >= argc)
        usage();

    // Get the input and output filenames
    m_inName = argv[i++];

    if (m_outName == NULL)
    {
        m_outName = m_inName;
        if (::strcmp(m_outName, "-") == 0)
            m_outName = "stdout";
    }

    if (::strlen(m_outName) + ::strlen(SUFF_SEP) + m_digits >= FILENAME_MAX)
    {
        ::printf("Output filename is too long (> %d): %s\n",
            FILENAME_MAX, m_outName);
        return RC_NAME;
    }

    // Split the named file into smaller files
    rc = splitFile(m_inName);
    return rc;
}


//------------------------------------------------------------------------------
// Program::parseNum()
//	Parse a numeric filesize string.
//
// Param	s
//	String containing a numeric value, followed by an optional radix suffix,
//	followed by an optional units suffix.
//
// Param	val
//	Pointer where the converted value is to be written.
//
// Returns
//	The converted numeric value of 's', or -1 if the number is malformed.
//------------------------------------------------------------------------------
bool Program::parseNum(const char *s, llong_t *val)
{
#if Program_VS/100 != 1
 #error class Program has changed
#endif

    llong_t		n;
    long		units;
    const char *	radix;
    int			len;
    char		ch;
    char		buf[30+1];

    // Ignore leading zeros and spaces
    while (s[0] == ' '  or  s[0] == '0')
        s++;
    if (m_debugs)
        ::printf("$ size: %s\n", s);

    // Make a local modifiable copy of the string
    len = ::strlen(s);
    if (len >= sizeof(buf))
        return false;
    ::strncpy(buf, s, sizeof(buf)-1);

    // Check for a units suffix
    ch = buf[len-1];
    if (not isdigit(ch))
    {
        switch (tolower(ch))
        {
        case 'b':	units = 512;  break;		// 512-byte block
        case 'k':	units = 1024;  break;		// Kilobyte
        case 'm':	units = 1024*1024;  break;	// Megabyte
        case 's':	units = 128;  break;		// 128-byte sector

        default:
            // Invalid suffix
            return false;
        }

        buf[--len] = '\0';
    }
    else
        units = 1;	// Bytes

    // Check for a radix suffix
    ch = buf[len-1];
    if (not isdigit(ch))
    {
        switch (tolower(ch))
        {
        case 'h':	// Hexadecimal
        case 'x':
            radix = "%" FMT_LLONG "x";
            break;

        case 'o':	// Octal
        case 'q':
            radix = "%" FMT_LLONG "o";
            break;

        default:
            // Invalid suffix
            return false;
        }

        buf[--len] = '\0';
    }
    else
        radix = "%" FMT_LLONG "u";	// Decimal

    // Convert the string into byte units
    if (::sscanf(buf, radix, &n) < 1)
        return false;
    if (n * units < n)
        return false;
    n *= units;
    *val = n;

    if (m_debugs)
        ::printf("$ size: '%s' -> %" FMT_LLONG "d\n", buf, n);
    return true;
}


//------------------------------------------------------------------------------
// Program::splitFile()
//	Split a file into several smaller files.
//
// Returns
//	One of the 'RC_XXX' return codes.
//------------------------------------------------------------------------------
int Program::splitFile(const char *fname)
{
#if Program_VS/100 != 1
 #error class Program has changed
#endif

    int		rc;
    bool	isStdin;
    FILE *	in;
    struct stat	info;
    int		nFiles;
    bool	eof;

    // Check the input filename
    isStdin = (::strcmp(fname, "-") == 0);
    if (m_debugs)
        ::printf("$ fname: %s\n", fname);

    // Open the input file
    errno = 0;
    if (isStdin)
    {
        // Read from stdin
        if (m_debugs)
            ::printf("$ read from stdin\n");
        in = stdin;
#ifdef O_BINARY
        setmode(fileno(stdin), O_BINARY);
#endif
    }
    else
    {
        // Read a named file
        in = ::fopen(fname, "rb");
        if (in == NULL)
        {
            ::printf("Can't open/read: %s: %s\n", fname, ::strerror(errno));
            return RC_READ;
        }
    }

    if (m_verbose  and  not isStdin)
    {
        ::printf("File: %s\n", fname);
        ::fflush(stdout);
    }

    // Check the source file size
    if (not isStdin)
    {
        errno = 0;
        if (::stat(fname, &info) < 0)
        {
            ::printf("Can't stat: %s: %s\n", fname, ::strerror(errno));
            ::fflush(stdout);
            return RC_STAT;
        }

        if (m_verbose)
        {
            ::printf("Size: %" FMT_LLONG "d\n", (llong_t) info.st_size);
            ::fflush(stdout);
        }

        if (info.st_size <= m_maxSize  and  not m_writeSmall)
        {
            // Input file is small, no output file is needed
            ::printf("File is smaller than max output size, no output file "
                "created\n");
            ::fflush(stdout);
            return RC_SMALL;
        }
    }

    // Split the input file into smaller output files
    nFiles = 0;
    eof = false;
    rc = RC_OKAY;

    do
    {
        llong_t	nBytes;
        FILE *	out;
        int	ch;
        char	outName[FILENAME_MAX+1];

        // Check for the end of the input file
        ch = getc(in);
        if (ch == EOF)
            break;
        ungetc(ch, in);

        // Set up the next output filename
        nFiles++;
        ::sprintf(outName, "%s%s%0*u", m_outName, SUFF_SEP, m_digits, nFiles);

        if (m_debugs)
            ::printf("$ open:  %s\n", outName);

        // Open the next output file
        out = ::fopen(outName, "wb");
        if (out == NULL)
        {
            ::printf("Can't open/write: %s: %s\n", outName, ::strerror(errno));
            rc = RC_WRITE;
            break;
        }

        if (m_verbose)
            ::printf("Output: %s\n", outName);

        // Copy the next portion of the input file to the next output file
        nBytes = m_maxSize;
        while (nBytes > 0  and  not eof)
        {
            int		len;
            char	buf[IO_BUFSIZE];

            // Read the next block of bytes from the input file
            len = sizeof(buf);
            if (len > nBytes)
                len = (int) nBytes;

            len = ::fread(buf, 1, len, in);
            if (len > 0)
            {
                // Write the block of bytes to the output file
                if (m_debugs)
                    ::printf("$ read: %d of %" FMT_LLONG "d bytes\n",
                        len, nBytes);

                if (::fwrite(buf, 1, len, out) < len)
                {
                    ::printf("Write error: %s: %s\n",
                        outName, ::strerror(errno));
                    rc = RC_WRITE;
                    break;
                }

                nBytes -= len;
            }
            else
                eof = true;
        }

        // Clean up
        if (m_debugs)
            ::printf("$ close: %s\n", outName);
        ::fclose(out);

    } while (not eof  and  rc == RC_OKAY);

    // Clean up
    if (m_debugs)
        ::printf("$ close: %s\n", fname);
    ::fclose(in);

    if (m_verbose)
        ::printf("Output files: %d\n", nFiles);
    ::fflush(stdout);

    return rc;
}


//------------------------------------------------------------------------------
// main()
//	Execute this program.
//------------------------------------------------------------------------------
int main(int argc, char **argv)
{
    Program	prog;

    return prog.main(argc, (const char **) argv);
}

// End fsplit.cpp