//============================================================================== // fsplit.cpp // Split a binary file into several smaller files. // // The resulting output files can then be reconstituted into the original // binary file using the Unix 'cat(1)' command, or the MS-DOS or MS/Windows // 'type' or 'copy' commands. // // Usage // fsplit [-option...] file // // Options // -ns // Do not create output files if the input file is smaller than the maximum // output size. // // -o file // Output filename prefix. All output files are composed of this prefix // followed by a '.' and a sequence number. By default, the prefix is the // same as the input filename. // // -s num // Maximum output file size (in bytes). The default is 1m (1 megabyte). // The size may have one of the following radix suffixes: // h Hexadecimal (base 16) // o Octal (base 8) // The size may have one of the following unit suffixes: // b Blocks (512 bytes) // k Kilobytes (1,024 bytes) // m Megabytes (1,048,576 bytes) // s Sectors (128 bytes) // // -v // Verbose output, display the output filenames as they are written. // // -w num // Number of digits in the output filename suffix (the default is 3). // // An input filename of "-" indicates the standard input. // // Notice // Written by David R. Tribble, Apr 2009. // // Copyright ©2009 by David R. Tribble, all rights reserved. // Permission is granted to any person or entity except those designated // by the United States Department of State as a terrorist, or terrorist // government or agency, to use and distribute this source code provided // that the original copyright notice remains present and unaltered. //============================================================================== // Identification static char REV[] = "@(#)drt/src/cmd/fsplit.cpp $Revision: 1.4 $ $Date: 2009/05/01 19:19:42 $"; #define PROG "fsplit" #ifndef VERS #define VERS "2.0" #endif #ifndef DATE #define DATE "2009-04-30" #endif static char BUILT[] = "@(#)" "Built: " __DATE__ " " __TIME__; static char COPYRIGHT[] = "@(#)" "Copyright ©2009 by David R. Tribble, all rights reserved."; //------------------------------------------------------------------------------ // Includes #include #include #include #include #include #include #include #include #if _WIN32 #include #include #endif // Datatype 'long long' #if _WIN32 && _MSC_VER < 1200 #define llong_t __int64 #define FMT_LLONG "I64" #else #define llong_t long long #define FMT_LLONG "ll" #endif //------------------------------------------------------------------------------ // Manifest constants // Filename suffix separator #ifndef SUFF_SEP #define SUFF_SEP "." #endif // I/O buffer size #ifndef BUFSIZE #define BUFSIZE (64*1024) #endif // Default maximum output file size #ifndef MAXSIZE #define MAXSIZE (1*1024*1024L) #endif //------------------------------------------------------------------------------ // class Program // Embodies this program. //------------------------------------------------------------------------------ #define Program_VS 100 // Class version class Program { // Constants public: // Program exit codes enum ExitCode { RC_OKAY = 0, // Success RC_READ = 1, // I/O read error RC_WRITE = 2, // I/O write error RC_NAME = 3, // Output filename too long RC_STAT = 4, // Can't stat() the file RC_SMALL = 5, // Input file is too small RC_USAGE = 255 // Bad command usage }; private: // Radix suffix types enum Radix { RADIX_DEC = 'd', // None, decimal RADIX_HEX = 'h', // Hexadecimal }; // Program usage help message static const char * USAGE_TEXT[]; // Default max output file size static const long DFL_MAXSIZE = MAXSIZE; // I/O buffer size static const int IO_BUFSIZE = BUFSIZE; // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Functions public: // Destructor ~Program(); // Constructor Program(); // Split a file into several smaller files int main(int argc, const char **argv); // Display a program usage message and punt void usage(); private: // Parse a numeric filesize string bool parseNum(const char *s, llong_t *val); // Split a named file into smaller files int splitFile(const char *fname); // Not implemented Program(const Program &o); const Program & operator =(const Program &o); // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Variables private: const char * m_inName; // Source filename const char * m_outName; // Output filenames prefix enum Radix m_radix; // Sequence number radix llong_t m_maxSize; // Max output file size int m_digits; // Output digit width bool m_debugs; // Debugging output enabled bool m_verbose; // Verbose output bool m_overwrite; // Overwrite existing files bool m_writeSmall; // Write small output files }; //------------------------------------------------------------------------------ // Program::~Program() // Destructor. //------------------------------------------------------------------------------ Program::~Program() { #if Program_VS != 100 #error class Program has changed #endif } //------------------------------------------------------------------------------ // Program::Program() // Constructor. //------------------------------------------------------------------------------ Program::Program(): m_inName(NULL), m_outName(NULL), m_radix(RADIX_DEC), m_maxSize(DFL_MAXSIZE), m_digits(3), m_debugs(false), m_verbose(false), m_overwrite(false), m_writeSmall(true) { #if Program_VS != 100 #error class Program has changed #endif } //------------------------------------------------------------------------------ // Program::USAGE_TEXT[] // Program usage help message text. //------------------------------------------------------------------------------ const char * Program::USAGE_TEXT[] = { #if Program_VS/100 != 1 #error class Program has changed #endif "[" PROG " - " VERS ", " DATE "] (" "david" "\100" "tribble.com" ")", "", "Split a file into several smaller files.", "", "Usage: " PROG " [-option...] file", "", "Options:", #if NOT_SUPPORTED__ " -f Overwrite existing output files.", "", #endif #if NOT_SUPPORTED__ " -h Output files are named with a hexadecimal suffix.", "", #endif " -ns Do not create output files if the input file is smaller " "than", " the maximum output size.", "", " -o file Output filename prefix. All output files are composed of " "this", " prefix followed by a '" SUFF_SEP "' and a sequence number." " By default,", " the prefix is the same as the input filename.", "", " -s num Maximum output file size (in bytes).", " " "The default is 1m (1 megabyte).", " The size may have one of the following radix suffixes:", " h Hexadecimal (base 16)", " o Octal (base 8)", " The size may have one of the following unit suffixes:", " b Blocks (512 bytes)", " k Kilobytes (1,024 bytes)", " m Megabytes (1,048,576 bytes)", " s Sectors (128 bytes)", "", " -v Verbose output, display the output filenames as they are", " written.", "", " -w num Number of digits in the output filename suffix", " (the default is 3).", "", "An input filename of \"-\" indicates the standard input.", NULL }; //------------------------------------------------------------------------------ // Program::usage() // Display a program usage help message and punt. //------------------------------------------------------------------------------ void Program::usage() { #if Program_VS/100 != 1 #error class Program has changed #endif // Display a program usage help message and punt for (int i = 0; USAGE_TEXT[i] != NULL; i++) ::printf("%s\n", USAGE_TEXT[i]); ::fflush(stdout); // Punt ::exit(RC_USAGE); } //------------------------------------------------------------------------------ // Program::main() // Split a file into several smaller files. //------------------------------------------------------------------------------ int Program::main(int argc, const char **argv) { #if Program_VS/100 != 1 #error class Program has changed #endif int i; int rc; // Parse the command line options for (i = 1; i < argc and argv[i][0] == '-'; i++) { const char * arg; arg = argv[i]; if (::strcmp(arg, "-") == 0) break; else if (::strcmp(arg, "--") == 0) { i++; break; } else if (::strcmp(arg, "-D") == 0) m_debugs = true; #if NOT_SUPPORTED__ else if (::strcmp(arg, "-f") == 0) m_overwrite = true; #endif #if NOT_SUPPORTED__ else if (::strcmp(arg, "-h") == 0 and i+i < argc) m_radix = RADIX_HEX; #endif else if (::strcmp(arg, "-ns") == 0) m_writeSmall = false; else if (::strcmp(arg, "-o") == 0 and i+1 < argc) m_outName = argv[++i]; else if (::strcmp(arg, "-s") == 0 and i+1 < argc) { if (not parseNum(argv[++i], &m_maxSize)) { ::printf("Bad size (-s): %s\n", argv[i]); return RC_USAGE; } } else if (::strcmp(arg, "-v") == 0) m_verbose = true; else if (::strcmp(arg, "-w") == 0 and i+1 < argc) { if (::sscanf(argv[++i], "%d", &m_digits) == EOF) { ::printf("Bad width (-w): %s\n", argv[i]); return RC_USAGE; } } else { ::printf("Invalid option: %s\n", arg); usage(); } } // Check the command args if (i >= argc) usage(); // Get the input and output filenames m_inName = argv[i++]; if (m_outName == NULL) { m_outName = m_inName; if (::strcmp(m_outName, "-") == 0) m_outName = "stdout"; } if (::strlen(m_outName) + ::strlen(SUFF_SEP) + m_digits >= FILENAME_MAX) { ::printf("Output filename is too long (> %d): %s\n", FILENAME_MAX, m_outName); return RC_NAME; } // Split the named file into smaller files rc = splitFile(m_inName); return rc; } //------------------------------------------------------------------------------ // Program::parseNum() // Parse a numeric filesize string. // // Param s // String containing a numeric value, followed by an optional radix suffix, // followed by an optional units suffix. // // Param val // Pointer where the converted value is to be written. // // Returns // The converted numeric value of 's', or -1 if the number is malformed. //------------------------------------------------------------------------------ bool Program::parseNum(const char *s, llong_t *val) { #if Program_VS/100 != 1 #error class Program has changed #endif llong_t n; long units; const char * radix; int len; char ch; char buf[30+1]; // Ignore leading zeros and spaces while (s[0] == ' ' or s[0] == '0') s++; if (m_debugs) ::printf("$ size: %s\n", s); // Make a local modifiable copy of the string len = ::strlen(s); if (len >= sizeof(buf)) return false; ::strncpy(buf, s, sizeof(buf)-1); // Check for a units suffix ch = buf[len-1]; if (not isdigit(ch)) { switch (tolower(ch)) { case 'b': units = 512; break; // 512-byte block case 'k': units = 1024; break; // Kilobyte case 'm': units = 1024*1024; break; // Megabyte case 's': units = 128; break; // 128-byte sector default: // Invalid suffix return false; } buf[--len] = '\0'; } else units = 1; // Bytes // Check for a radix suffix ch = buf[len-1]; if (not isdigit(ch)) { switch (tolower(ch)) { case 'h': // Hexadecimal case 'x': radix = "%" FMT_LLONG "x"; break; case 'o': // Octal case 'q': radix = "%" FMT_LLONG "o"; break; default: // Invalid suffix return false; } buf[--len] = '\0'; } else radix = "%" FMT_LLONG "u"; // Decimal // Convert the string into byte units if (::sscanf(buf, radix, &n) < 1) return false; if (n * units < n) return false; n *= units; *val = n; if (m_debugs) ::printf("$ size: '%s' -> %" FMT_LLONG "d\n", buf, n); return true; } //------------------------------------------------------------------------------ // Program::splitFile() // Split a file into several smaller files. // // Returns // One of the 'RC_XXX' return codes. //------------------------------------------------------------------------------ int Program::splitFile(const char *fname) { #if Program_VS/100 != 1 #error class Program has changed #endif int rc; bool isStdin; FILE * in; struct stat info; int nFiles; bool eof; // Check the input filename isStdin = (::strcmp(fname, "-") == 0); if (m_debugs) ::printf("$ fname: %s\n", fname); // Open the input file errno = 0; if (isStdin) { // Read from stdin if (m_debugs) ::printf("$ read from stdin\n"); in = stdin; #ifdef O_BINARY setmode(fileno(stdin), O_BINARY); #endif } else { // Read a named file in = ::fopen(fname, "rb"); if (in == NULL) { ::printf("Can't open/read: %s: %s\n", fname, ::strerror(errno)); return RC_READ; } } if (m_verbose and not isStdin) { ::printf("File: %s\n", fname); ::fflush(stdout); } // Check the source file size if (not isStdin) { errno = 0; if (::stat(fname, &info) < 0) { ::printf("Can't stat: %s: %s\n", fname, ::strerror(errno)); ::fflush(stdout); return RC_STAT; } if (m_verbose) { ::printf("Size: %" FMT_LLONG "d\n", (llong_t) info.st_size); ::fflush(stdout); } if (info.st_size <= m_maxSize and not m_writeSmall) { // Input file is small, no output file is needed ::printf("File is smaller than max output size, no output file " "created\n"); ::fflush(stdout); return RC_SMALL; } } // Split the input file into smaller output files nFiles = 0; eof = false; rc = RC_OKAY; do { llong_t nBytes; FILE * out; int ch; char outName[FILENAME_MAX+1]; // Check for the end of the input file ch = getc(in); if (ch == EOF) break; ungetc(ch, in); // Set up the next output filename nFiles++; ::sprintf(outName, "%s%s%0*u", m_outName, SUFF_SEP, m_digits, nFiles); if (m_debugs) ::printf("$ open: %s\n", outName); // Open the next output file out = ::fopen(outName, "wb"); if (out == NULL) { ::printf("Can't open/write: %s: %s\n", outName, ::strerror(errno)); rc = RC_WRITE; break; } if (m_verbose) ::printf("Output: %s\n", outName); // Copy the next portion of the input file to the next output file nBytes = m_maxSize; while (nBytes > 0 and not eof) { int len; char buf[IO_BUFSIZE]; // Read the next block of bytes from the input file len = sizeof(buf); if (len > nBytes) len = (int) nBytes; len = ::fread(buf, 1, len, in); if (len > 0) { // Write the block of bytes to the output file if (m_debugs) ::printf("$ read: %d of %" FMT_LLONG "d bytes\n", len, nBytes); if (::fwrite(buf, 1, len, out) < len) { ::printf("Write error: %s: %s\n", outName, ::strerror(errno)); rc = RC_WRITE; break; } nBytes -= len; } else eof = true; } // Clean up if (m_debugs) ::printf("$ close: %s\n", outName); ::fclose(out); } while (not eof and rc == RC_OKAY); // Clean up if (m_debugs) ::printf("$ close: %s\n", fname); ::fclose(in); if (m_verbose) ::printf("Output files: %d\n", nFiles); ::fflush(stdout); return rc; } //------------------------------------------------------------------------------ // main() // Execute this program. //------------------------------------------------------------------------------ int main(int argc, char **argv) { Program prog; return prog.main(argc, (const char **) argv); } // End fsplit.cpp