/* spliff.cpp --- reformat C/C++ code into a canonical style */ /* * Copyright (C) 1999 Roger Willcocks * rogerw@centipede.co.uk * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. */ /* * This is a simple reformatter for C++ code. It splits the source file * into tokens and then emits them in a canonical layout. All white space * (including carriage returns) is stripped out and then reinserted, so * that for instance squiggly brackets may migrate onto a different source * code line. Comments are generally passed through untouched. * * The program was written to simplify the comparison of a large number of * C++ files that had branched into two separate projects with different * indentation standards. Use 'spliff' to fold both sources into a * canonical format before comparing the files. * * Todo ... '*', '&' are always treated as pointer and 'or', so the * layout is not quite right in some cases; single line comments can * migrate past '}'; long expressions aren't broken across lines. */ #include #include #include #include // if NOBLANKLINES is defined, blank lines are only inserted between // functions and after break, do/while and return statements. // Otherwise blank lines are transferred from the source (but multiple // blank lines are folded.) #undef NOBLANKLINES // if BRACKTESTYLE2 is defined, squiggly brackets appear on a line of // their own #undef BRACKETSTYLE2 // if INCLUDECOMMENTS is defined, comments are passed // through. Otherwise they are stripped out #define INCLUDECOMMENTS // if ADDBRACKETS is defined, squiggly brackets are inserted after // 'if' and 'else'. The scheme breaks down for if if else else so it's // not recommended. A warning is printed if misformatting might have // occurred. #undef ADDBRACKETS #if 0 /* sample for testing - run spliff on itself */ int a(a) { if(a)a;else a; while(a); if(a); do{a;}while(a); do a;while(a); while(a){a;} for(;;); delete[]a; for(;;)if(a)a;b; } #endif char *strdupe(char *arg) { char *s = new char[strlen(arg)+1]; strcpy(s, arg); return s; } class variable { public: class variable *left, *right; char *text; char *subst; short flags; variable(char *arg) { left = right = 0; text = strdupe(arg); flags = 0; }; ~variable() { if (left) delete left; if (right) delete right; delete [] text; }; }; #define HASH(loc, val) { unsigned long t; loc <<= 4; loc += val; \ if (t = loc & 0xf0000000L) { loc ^= (t >> 24); loc ^= t; } } unsigned long doHash(unsigned char *p) { unsigned val = 0; while (*p) { HASH(val, *p); p++; } return val; } #define DICTSZ 211 variable *dictionary[DICTSZ]; #define LITERAL 1 #define PARTIAL 2 #define VALID 4 #define WHITESPACE 8 #define NEWLINE 16 #define COMMENT 32 #define PREPROC 64 #define MAPPED 128 #define DROPCR 256 #define SINGLE 512 /* not compound after if, for - need additional indent */ #define FORCECR 1024 #define EXDENT 2048 #define DOWHILE 4096 variable *comment1, *comment2; class item { public: class item *next; variable *vp; short flags; short exdent; item(variable *_vp, int _flags) { vp = _vp; flags = _flags | vp->flags; exdent = 0; next = 0; }; ~item() { }; }; variable *lookup(char *what, int create) { variable **vp = & dictionary[doHash((unsigned char *) what) % DICTSZ]; int t; for (;;) { if (*vp == 0) { if (create) *vp = new variable(what); return *vp; } if ((t = strcmp((*vp)->text, what)) == 0) return *vp; else vp = (t < 0) ? & (*vp)->left : & (*vp)->right; } } /* add all partial substrings to dictionary, flagged as possible continuances */ void literal(char *what) { int i, len; char temp[10]; len = strlen(what); variable *vp; for (i = 0; i < len; i++) { temp[i] = what[i]; temp[i+1] = 0; vp = lookup(temp, 1); vp->flags |= PARTIAL | LITERAL; if (i == (len-1)) vp->flags |= VALID | LITERAL; } } void initialize() { literal("::"); literal("&&"); literal("||"); literal("&="); literal("|="); literal("<="); literal(">="); literal("!="); literal("=="); literal("--"); literal("++"); literal("+="); literal("-="); literal("*="); literal("/="); literal("^="); literal("~="); literal("<<"); literal(">>"); literal("->"); literal("->*"); literal("<<="); literal(">>="); literal("/*"); literal("*/"); literal("//"); literal("[]"); literal("**"); /* hacks */ literal("(*"); literal("( *"); literal("};"); literal("} ;"); } int currentLine = 0; variable *nexttoken(FILE *f) { variable *vp; char buffer[2000]; int p = 0; int c; c = getc(f); if (c == -1) return 0; if (c == '\n' || c == '\r') { buffer[p++] = '\n'; buffer[p] = 0; vp = lookup(buffer, 1); vp->flags |= NEWLINE; return vp; } if (c == ' ' || c == '\t') { do { buffer[p++] = c; c = getc(f); } while (c == ' ' || c == '\t'); ungetc(c, f); buffer[p] = 0; vp = lookup(buffer, 1); vp->flags |= WHITESPACE; return vp; } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') { do { buffer[p++] = c; c = getc(f); } while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'); ungetc(c, f); buffer[p] = 0; return lookup(buffer, 1); } else if (c == '"' || c == '\'') { int quoteFlag = 0; int match = c; do { buffer[p++] = c; quoteFlag = (c == '\\' && ! quoteFlag); c = getc(f); } while (c != -1 && (c != match || quoteFlag)); buffer[p++] = match; buffer[p] = 0; return lookup(buffer, 1); } else if (c == '#') { more: do { buffer[p++] = c; c = getc(f); if (c == '\r') c = getc(f); } while (c != -1 && c != '\n'); if (c != -1 && p && buffer[p-1] == '\\') goto more; ungetc(c, f); buffer[p] = 0; vp = lookup(buffer, 1); vp->flags |= PREPROC; return vp; } else { do { buffer[p++] = c; buffer[p] = 0; vp = lookup(buffer, 0); if (vp && (vp->flags & PARTIAL)) { c = getc(f); if (c == -1) return vp; } } while (vp && (vp->flags & PARTIAL)); if (vp && (vp->flags & LITERAL)) return vp; /* all literal stems are themselves literal */ if (p > 1) { ungetc(buffer[--p], f); buffer[p] = 0; } vp = lookup(buffer, 1); if (vp == comment1) { do { c = getc(f); if (c == -1) return 0; buffer[p++] = c; } while (!(p >= 2 && buffer[p-2] == '*' && buffer[p-1] == '/')); buffer[p] = 0; vp = lookup(buffer, 1); vp->flags |= COMMENT; } else if (vp == comment2) { do { c = getc(f); if (c == -1) return 0; if (c == '\r') continue; buffer[p++] = c; } while (!(buffer[p-1] == '\n')); buffer[p-1] = 0; vp = lookup(buffer, 1); vp->flags |= COMMENT; } return vp; } } item *parse(FILE *f, int which) { variable *vp; item *head = 0, *tail; do { vp = nexttoken(f); if (vp) { if (head) tail = tail->next = new item(vp, 0); else tail = head = new item(vp, 0); } } while (vp); return head; } void remap(char *from, char *to) { variable *vp = lookup(from, 1); vp->flags |= MAPPED; vp->subst = to; } /* '\004' will consume any adjacent space (left or right) */ void munge() { remap(".", "\004.\004"); remap("+", "\004+\004"); remap("-", "\004-\004"); remap("/", " / "); remap("&", " & "); remap("|", " | "); remap("<", " < "); remap(">", " > "); remap("=", " = "); remap("*", " *\004"); remap("~", " ~\004"); remap("&&", " && "); remap("||", " || "); remap("&=", " &= "); remap("|=", " |= "); remap("<=", " <= "); remap(">=", " >= "); remap("!=", " != "); remap("==", " == "); remap("::", "\004::\004"); remap("++", "\004++\004"); remap("--", "\004--\004"); remap("->", "\004->\004"); remap("->*", "\004->*\004"); remap("+=", " += "); remap("-=", " -= "); remap("*=", " *= "); remap("/=", " /= "); remap("^=", " ^= "); remap("~=", " ~= "); remap("<<", " << "); remap(">>", " >> "); remap("<<=", " <<= "); remap(">>=", " >>= "); remap(",", "\004, "); remap("(", "\004(\004"); remap(")", "\004)"); remap("[", "\004[\004"); remap("]", "\004]\004"); remap("if", "if "); remap("else", "else "); remap("while", "while "); remap("do", "do "); remap("for", "for "); remap("switch", "switch "); remap("case", "case "); remap("[]", " [] "); /* new [] thing */ remap(";", "\004;\n"); remap("**", " **\004"); remap("(*", "\004(*\004"); remap("( ", "\004(\004"); remap("( *", "\004(*\004"); remap("{", " {\n"); remap("}", "\004}\n"); remap("} ", "\004}\n"); remap("} ;", "\004};\n"); remap("};", "\004};\n"); remap(":", "\004:\n"); /* treated differently if query */ remap("?", " ? "); #if 0 /* can arbitrarily map any string to any other string */ remap("_fstrchr", "strchr"); remap("_fstrcpy", "strcpy"); remap("_fstrncpy", "strncpy"); remap("_fstrcat", "strcat"); remap("_fstrncat", "strncat"); remap("_fstrlen", "strlen"); remap("_fstrlen", "strlen"); remap("_fstrupr", "strupr"); remap("_fstrlwr", "strlwr"); remap("_fstrcmp", "strcmp"); remap("_fstricmp", "stricmp"); remap("_fstrtok", "strtok"); remap("_fstrncmp", "strncmp"); remap("_fstrnicmp", "strnicmp"); #endif } item *skipwhite(item *p, int *sawNewline, int *sawComment) { while (p && p->vp->flags & (COMMENT | NEWLINE | WHITESPACE)) { if (sawNewline && (p->vp->flags & NEWLINE)) (*sawNewline)++; if (sawComment && (p->vp->flags & COMMENT)) (*sawComment)++; p = p->next; } return p; } int main(int argc, char *argv [] ) { if (argc != 2) { fprintf(stderr, "usage: spliff file.cpp > file.out\n"); exit(4); } FILE *f = fopen(argv[1], "r"); initialize(); comment1 = lookup("/*", 1); comment2 = lookup("//", 1); item *f1 = parse(f, 0); munge(); char last = ' '; int indent = 0; int tempindent = 0; int eofn = 0; int eoln = 0; int extraline = 0; int query = 0; variable *_for = lookup("for", 0); variable *_else = lookup("else", 0); variable *_do = lookup("do", 0); variable *_while = lookup("while", 0); variable *_if = lookup("if", 0); variable *_case = lookup("case", 0); variable *_switch = lookup("switch", 0); variable *_default = lookup("default", 0); variable *_break = lookup("break", 0); variable *_return = lookup("return", 0); variable *_public = lookup("public", 0); variable *_private = lookup("private", 0); variable *_protected = lookup("protected", 0); variable *_colon = lookup(":", 0); item *f2 = skipwhite(f1, 0, 0); item *fp = f1; while (fp != f2) { if (fp->flags & COMMENT) printf("%s\n", fp->vp->text); fp = fp->next; } while ((f1 = skipwhite(f1, 0, 0)) != 0) { int _indent = indent; if (f1->vp == _do) { int nest = 0; fp = f1; f2 = fp->next; while ((f2 = skipwhite(f2, 0, 0)) != 0) { if (f2->vp->text[0] == '{' || f2->vp->text[0] == '(') nest++; else if (f2->vp->text[0] == '}' || f2->vp->text[0] == ')') nest--; else if (nest == 0 && f2->vp == _while) { fp->flags |= DROPCR; f2->flags |= DOWHILE; break; } fp = f2; f2 = f2->next; } } else if (f1->vp == _for) { int rounds = 0; f2 = f1->next; while ((f2 = skipwhite(f2, 0, 0)) != 0) { if (f2->vp->text[0] == '(') rounds++; else if (f2->vp->text[0] == ')') { rounds--; if (rounds == 0) break; } f2->flags |= DROPCR; f2 = f2->next; } } else if (f1->vp == _else) { f2 = skipwhite(f1->next, 0, 0); if (f2->vp != _if && f2->vp->text[0] != '{') f1->flags |= SINGLE; } else if (f1->vp == _break || f1->vp == _return || (f1->vp == _while && (f1->flags & DOWHILE))) { f2 = f1->next; while ((f2 = skipwhite(f2, 0, 0)) != 0) { if (f2->vp->text[0] == ';') { f2->flags |= FORCECR; break; } f2 = f2->next; } } if (f1->vp == _if || f1->vp == _for || (f1->vp == _while && ! (f1->flags & DOWHILE))) { int rounds = 0; f2 = f1->next; while ((f2 = skipwhite(f2, 0, 0)) != 0) { if (f2->vp->text[0] == '(') rounds++; else if (f2->vp->text[0] == ')') { rounds--; if (rounds == 0) { fp = skipwhite(f2->next, 0, 0); if (! fp || fp->vp->text[0] != '{') f2->flags |= SINGLE; break; } } f2 = f2->next; } } if (f1->flags & SINGLE) { int nest = 0; fp = f1->next; while ((fp = skipwhite(fp, 0, 0)) != 0) { if (fp->vp->text[0] == '{' || fp->vp->text[0] == '(') nest++; else if (fp->vp->text[0] == '}' || fp->vp->text[0] == ')') nest--; if (nest == 0 && (fp->vp->text[0] == '}' || fp->vp->text[0] == ';')) { fp->flags |= EXDENT; fp->exdent++; break; } fp = fp->next; } } char *t = (f1->vp->flags & MAPPED) ? f1->vp->subst : f1->vp->text; if (query && f1->vp == _colon) { t = " : "; query--; } if (last == ' ' || last == '\004' || *t == ' ' || *t == '\004') { if (last == ' ' && *t == ' ') t++; } else if (! eoln && ! eofn) printf(" "); if ((eoln || eofn) && f1->vp->text[0] == '{' && *t == ' ') t++; if (f1->vp->text[0] == '{') indent++; eoln = 0; eofn = 0; #ifdef ADDBRACKETS if (f1->flags & SINGLE) { fp = f1; #ifdef BRACKETSTYLE2 // bracket goes after any comments while (fp && fp->next && (fp->next->flags & (COMMENT | WHITESPACE)) && ! (fp->next->flags & NEWLINE)) fp = fp->next; #endif f2 = new item(lookup("{", 0), 0); f2->next = fp->next; fp->next = f2; } if (f1->flags & EXDENT) { if (f1->exdent > 1) { fprintf(stderr, "if/if in %s\n", argv[1]); } for (int i = 0; i < f1->exdent; i++) { f2 = new item(lookup("}", 0), 0); f2->next = f1->next; f1->next = f2; } } #else if (f1->flags & SINGLE) { tempindent++; eoln = 1; } if (f1->flags & EXDENT) { tempindent -= f1->exdent; f1->flags |= FORCECR; } #endif if (f1->vp->text[0] == '#') { printf("%s", f1->vp->text); eoln = 1; t = ""; } while (*t) { if ((last = *t++) != '\004') { if (f1->flags & DROPCR) { if (last == '\n') continue; } if (last == '\n') { eoln = 1; continue; } if (last == '}' && _indent == 0) eofn = 1; printf("%c", last); } } f2 = skipwhite(f1->next, & extraline, 0); if (f2 && f2->vp->text[0] == '}') indent--; #ifdef NOBLANKLINES extraline = 0; /* no blank lines inferred from source */ #endif int supressextra = (f1->vp->text[0] == '{' || f1->vp->text[0] == '}' || (f2 != 0 && (f2->vp->text[0] == '{' || f2->vp->text[0] == '}' || f2->vp == _else || f2->vp == _break))); if (((f1->flags & FORCECR) || extraline > 1) && ! supressextra) { extraline = 1; eoln = 1; } else extraline = 0; if (f2 && ((indent == 0 && f2->vp->text[0] == '{') || f2->vp->text[0] == '}')) eoln = 1; if (f1->vp->text[0] == '?') query++; /* special treatment for ':' */ #ifdef BRACKETSTYLE2 if (f2 && f2->vp->text[0] == '{') eoln = 1; /* tim style bracketing */ #endif if (eoln) { int adjust = (f2 && (f2->vp == _case || f2->vp == _public || f2->vp == _private || f2->vp == _protected || f2->vp == _default)) ? -1 : 0; if (! adjust && f2 && f2->next && f2->next->vp == _colon) adjust = -1; #ifdef INCLUDECOMMENTS // look for comments to tack on end of this line ... fp = f1->next; while (fp != f2 && ! (fp->flags & NEWLINE)) { if (fp->flags & COMMENT) printf(" %s", fp->vp->text); fp = fp->next; } #endif if (extraline) printf("\n"); printf("\n"); int tabs = 4 * (indent + tempindent + adjust) - 2 * adjust; #ifdef INCLUDECOMMENTS int hadcomment = 0; while (fp != f2) { if (fp->flags & COMMENT) { if (! extraline && (! supressextra || _indent == 0)) { extraline = -1; printf("\n"); eofn = 0; } for (int i = 0; i < tabs; i++) printf(" "); printf("%s\n", fp->vp->text); hadcomment = 1; } fp = fp->next; } if (hadcomment) printf("\n"); #endif if (!(f2 && f2->vp->text[0] == '#')) for (int i = 0; i < tabs; i++) printf(" "); } if (eofn) { printf("\n\n"); } extraline = 0; f1 = f1->next; } printf("\n"); fclose(f); return 0; } /* end */