//============================================================================== // detab.cpp // Convert tabs (HT) in text files into spaces. // Handles 8-bit (ASCII, ISO 8859-1) text files, 7-bit ASCII with parity, // and UTF-8, UTF-16, and UTF-32 Unicode files. // // Usage // detab [-option...] file... // // Notice // Copyright ©2010-2011 by David R. Tribble, all rights reserved. // Permission is granted to any person or entity except those designated // by the United States Department of State as a terrorist, or terrorist // government or agency, to use and distribute this source code provided // that the original copyright notice remains present and unaltered. //============================================================================== // Identification #define PROG "detab" #ifndef VERS #define VERS "2.1" #endif #define DATE "2011-05-21" static char REV[] = "@(#)drt/src/cmd/detab.cpp $Revision: 1.2 $$Date: 2011/05/21 22:22:27 $"; static char BUILT[] = "@(#)" "Built: " __DATE__ " " __TIME__; static char COPYRIGHT[] = "@(#)" "Copyright ©2010-2011 by David R. Tribble, all rights reserved."; // System definitions #if defined(_WIN32) #define OS_WIN32 1 #elif defined(unix) || defined(_unix) || defined(__unix) || defined(__unix__) #define OS_UNIX 1 #elif defined(_MAC) || defined(MACOS) || defined(_MACOS) #define OS_MACOS 1 #else #error Target operating system is unknown #endif // Includes #include #define sys_ctype_h #include #define sys_errno_h #include #define sys_iso646_h #include #define sys_stdlib_h #include #define sys_string_h #include "textstream.hpp" #include "intextstream.hpp" #include "outtextstream.hpp" // Manifest constants #define DFL_TABSIZE 8 //------------------------------------------------------------------------------ // class Program // Program to convert newlines in text files. //------------------------------------------------------------------------------ #define Program_VS 101 // Class version class Program { // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Constants public: enum { RC_OKAY = 0, // Success RC_READ = 1, // Can't read input file RC_WRITE = 2, // Can't write output file RC_USAGE = 255 // Bad program usage }; private: static const char *const s_usageMsg[]; // Usage messages // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Static functions public: static void usage(); // Show a usage message & punt // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Functions public: /**/ ~Program() { } // Destructor /**/ Program(); // Constructor int main(int argc, const char *const *argv); // Run this program private: /**/ Program(const Program &o); const Program & operator =(const Program &o); void convert(InTextStream *in, OutTextStream *out) const; // Convert a file // ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ // Variables private: const char * m_inName; // Input filename const char * m_outName; // Output filename enum TextStream::FileType m_chType; // I/O file type int m_inTabSize; // Input tab width int m_outTabSize; // Output tab width bool m_bom; // Output starts with BOM bool m_flushNL; // Flush after newlines }; //------------------------------------------------------------------------------ // Program::Program() // Constructor. //------------------------------------------------------------------------------ Program::Program(): m_inName(NULL), m_outName("-"), m_chType(TextStream::FTYPE_8BIT), m_inTabSize(DFL_TABSIZE), m_outTabSize(0), m_bom(false), m_flushNL(false) { #if Program_VS != 101 #error class Program has changed #endif // Initialize } //------------------------------------------------------------------------------ // Program::convert() // Convert a file, translating tab (HT) characters into spaces (SP). //------------------------------------------------------------------------------ void Program::convert(InTextStream *in, OutTextStream *out) const { #if Program_VS/100 != 1 #error class Program has changed #endif int ch; // Start reading the input stream ch = in->read(); if (ch == TextStream::CH_EOF) return; // Write a leading BOM if necessary if (m_bom) out->write(TextStream::CH_BOM); // Convert the file contents while (ch != TextStream::CH_EOF) { int tabSize = m_inTabSize; int colNo = 0; // Convert the next text line from the input stream while (ch != TextStream::CH_EOF) { // Process a char from the input stream if (ch == TextStream::CH_CR or ch == TextStream::CH_LF or ch == TextStream::CH_FF or ch == TextStream::CH_VT) { // End of line break; } else if (ch == TextStream::CH_HT and tabSize > 0) { // Replace a tab (HT) with spaces (SP) while (colNo < tabSize) { out->write(TextStream::CH_SP); colNo++; } colNo = 0; } else { // Write a regular (non-tab) char out->write(ch); colNo++; if (colNo >= tabSize) colNo = 0; } // Read the next char from the input stream ch = in->read(); } // End of line reached if (ch != TextStream::CH_EOF) { out->write(ch); if (m_flushNL) out->flush(); // Read the next char from the input stream ch = in->read(); } } // Done out->flush(); } //------------------------------------------------------------------------------ // Program::s_usageMsg[] // Program usage messages. // // See // Program::usage() //------------------------------------------------------------------------------ /*static*/ const char *const Program::s_usageMsg[] = { "[" PROG ", " VERS " " DATE "] (david.tribble.com)", "", "Replaces tab (HT) characters in text files with spaces (SP).", "", "Handles 8-bit ASCII and ISO 8859-1 text files, 7-bit ASCII with parity, " "and", "UTF-8, UTF-16, and UTF-32 Unicode files.", "", "Usage: " PROG " [-option...] file...", "", "Input options:", " -NNN Same as '-t NNN'", " -t NNN Input tab width, NNN spaces (default is 8)", #if INCOMPLETE___ " -ot NNN Replace output spaces with tabs (default is 0, no tabs)", #endif " -o file Output file (default is standard output)", " -8bit Characters are 8-bit ASCII or ISO 8859-1 (default)", " -7e Characters are 7-bit ASCII even parity", " -7m Characters are 7-bit ASCII mark parity", " -7n Characters are 7-bit ASCII with no (space) parity", " -7o Characters are 7-bit ASCII odd parity", " -utf8 Characters are UTF-8", " -utf16 Characters are UTF-16 big-endian", " -utf16r Characters are UTF-16 little-endian", " -utf32 Characters are UTF-32 big-endian", " -utf32r Characters are UTF-32 little-endian", " -24be Characters are 24-bit big-endian (non-standard)", " -24le Characters are 24-bit little-endian (non-standard)", " -bom Output begins with a Unicode byte order mark (BOM)", " -fl Flush output after newlines", "", "An input filename of \"-\" indicates standard input.", "An output filename of \"-\" indicates standard output.", NULL }; //------------------------------------------------------------------------------ // Program::usage() // Display a usage message. // // See // Program::main() // Program::s_usageMsg[] //------------------------------------------------------------------------------ void Program::usage() { #if Program_VS/100 != 1 #error class Program has changed #endif // Display a program usage message for (int i = 0; s_usageMsg[i] != NULL; i++) ::printf("%s\n", s_usageMsg[i]); // Punt ::exit(RC_USAGE); } //------------------------------------------------------------------------------ // Program::main() // Execute the program. //------------------------------------------------------------------------------ int Program::main(int argc, const char *const *argv) { #if Program_VS != 101 #error class Program has changed #endif OutTextStream * out = NULL; int rc = RC_OKAY; int i; // Parse option args for (i = 1; i < argc and argv[i][0] == '-' and argv[i][1] != '\0'; i++) { if (::strcmp(argv[i], "--") == 0) { i++; break; } else if (::strcmp(argv[i], "-o") == 0) { if (++i >= argc) usage(); m_outName = argv[i]; } else if (::strcmp(argv[i], "-8bit") == 0 or ::strcmp(argv[i], "-8b") == 0) m_chType = TextStream::FTYPE_8BIT; else if (::strcmp(argv[i], "-7n") == 0 or ::strcmp(argv[i], "-7bit") == 0) m_chType = TextStream::FTYPE_7NONE; else if (::strcmp(argv[i], "-7m") == 0) m_chType = TextStream::FTYPE_7MARK; else if (::strcmp(argv[i], "-7e") == 0) m_chType = TextStream::FTYPE_7EVEN; else if (::strcmp(argv[i], "-7o") == 0) m_chType = TextStream::FTYPE_7ODD; else if (::strcmp(argv[i], "-utf8") == 0) m_chType = TextStream::FTYPE_UTF8; else if (::strcmp(argv[i], "-utf16") == 0 or ::strcmp(argv[i], "-utf16be") == 0) m_chType = TextStream::FTYPE_UTF16; else if (::strcmp(argv[i], "-utf16r") == 0 or ::strcmp(argv[i], "-utf16le") == 0) m_chType = TextStream::FTYPE_UTF16_R; else if (::strcmp(argv[i], "-utf32") == 0 or ::strcmp(argv[i], "-utf32be") == 0) m_chType = TextStream::FTYPE_UTF32; else if (::strcmp(argv[i], "-utf32r") == 0 or ::strcmp(argv[i], "-utf32le") == 0) m_chType = TextStream::FTYPE_UTF32_R; else if (::strcmp(argv[i], "-24bit") == 0 or ::strcmp(argv[i], "-24be") == 0) m_chType = TextStream::FTYPE_24BIT; else if (::strcmp(argv[i], "-24bitr") == 0 or ::strcmp(argv[i], "-24le") == 0) m_chType = TextStream::FTYPE_24BIT_R; else if (::strcmp(argv[i], "-t") == 0) { if (++i >= argc) usage(); m_inTabSize = ::atoi(argv[i]); } else if (::strcmp(argv[i], "-bom") == 0) m_bom = true; else if (::strcmp(argv[i], "-fl") == 0) m_flushNL = true; #if INCOMPLETE___ else if (::strcmp(argv[i], "-ot") == 0) { if (++i >= argc) usage(); m_outTabSize = ::atoi(argv[i]); } #endif else if (isdigit(argv[i][1])) { for (int j = 1; argv[i][j] != '\0'; j++) if (not isdigit(argv[i][j])) usage(); m_inTabSize = ::atoi(argv[i]+1); } else usage(); } // Check usage if (i >= argc) usage(); // Open the output stream out = new OutTextStream(); if (out == NULL) { ::fprintf(stderr, "Can't allocate an output stream\n"); ::fflush(stderr); return RC_WRITE; } if (::strcmp(m_outName, "-") == 0) { if (not out->open(stdout, m_chType, TextStream::EOLN_NONE)) { ::fprintf(stderr, "Can't write to standard output; %s\n", strerror(errno)); ::fflush(stderr); return RC_WRITE; } } else { if (not out->open(m_outName, m_chType, TextStream::EOLN_NONE)) { ::fprintf(stderr, "Can't write to: %s; %s\n", m_outName, strerror(errno)); ::fflush(stderr); return RC_WRITE; } } // Convert the named files for ( ; i < argc; i++) { InTextStream * in = NULL; // Open a text file stream m_inName = argv[i]; in = new InTextStream(); if (in == NULL) { ::fprintf(stderr, "Can't allocate an input stream: %s; %s\n", m_inName, strerror(errno)); ::fflush(stderr); rc = RC_READ; goto Done; } if (::strcmp(m_inName, "-") == 0) { if (not in->open(stdin, m_chType, TextStream::EOLN_NONE)) { ::fprintf(stderr, "Can't read from standard input; %s\n", strerror(errno)); ::fflush(stderr); rc = RC_READ; goto Done; } } else { if (not in->open(m_inName, m_chType, TextStream::EOLN_NONE)) { ::fprintf(stderr, "Can't read: %s; %s\n", m_inName, strerror(errno)); ::fflush(stderr); rc = RC_READ; goto Done; } } // Convert the input file convert(in, out); Done: if (in != NULL) { in->close(); delete in; in = NULL; } } // Done ::fflush(stdout); out->close(); return rc; } //------------------------------------------------------------------------------ // ::main() // Execute this program. // // See // Program::main() //------------------------------------------------------------------------------ int main(int argc, char **argv) { Program pgm; return pgm.main(argc, (const char *const *) argv); } // End detab.cpp