/*********************************************************************/
/* ec.c - extract comments from C or C++ source files                */
/* by Paul Duncan (pabs) <pabs@pablotron.org> and                    */
/*    Brian Almieda (bma) <bma@tynian.net>                           */
/*                                                                   */
/* - Compile: eval $(tail -1 ec.c) # or !eval $(tail -1 %) in vim    */
/* - Usage:  ./ec --help                                             */
/*                                                                   */
/* The following code is hideous; view at your own risk.             */
/*********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define UNUSED(x) ((void) x)
#define VERBOSE_OUTPUT 0

/* default values */
#define INCLUDE_COMMENT_CHARS 0
#define ALLOW_PERL_COMMENTS 0

/* quick single linked-list implementation */
typedef struct _SList SList;
struct _SList {
  void *value;
  SList *next;
};

/* comment types */
typedef enum {
  COMMENT_NONE,
  COMMENT_C_STYLE,
  COMMENT_CPP_STYLE,
  COMMENT_PERL_STYLE
} CommentType;

/* quote types */
typedef enum {
  QUOTE_NONE,
  QUOTE_SINGLE,
  QUOTE_DOUBLE
} QuoteType;

/* options */
struct {
  char include_comment_chars;
  char perl;
  SList *inputs;
} options;

void dump_comment(int offset, char *buf);
void handle_options(int argc, char *argv[]);
void print_usage(char *app);
SList *s_list_append(SList *list, void *value);

int main(int argc, char *argv[]) {
  CommentType in_comment = COMMENT_NONE;
  QuoteType in_quotes = QUOTE_NONE;
  SList *n = NULL;
  FILE *in = NULL;
  char prev_char = 0;
  char buf[4096] = "";
  int c = 0, o = 0, so = 0, i = 0;
  UNUSED(argc);
  UNUSED(argv);

  handle_options(argc, argv);
  
  for (n = options.inputs; n; n = n->next) {
    if (!strncmp(n->value, "-", 2)) {
      in = stdin;
    } else {
      if (!(in = fopen(n->value, "r"))) {
        fprintf(stderr, "FATAL: Couldn't open input file \"%s\": %s\n",
            (char*) n->value, strerror(errno));
        exit(EXIT_FAILURE);
      }
    }

    while ((c = fgetc(in)) > 0) {
      if (in_comment != COMMENT_NONE) {
        buf[o++] = c;
  
        switch (in_comment) {
          case COMMENT_C_STYLE:
            if (c == '/' && prev_char == '*') {
              in_comment = COMMENT_NONE;
              prev_char = 0;
  
              if (options.include_comment_chars)
                buf[o] = 0;
              else 
                buf[o - 2] = 0;
  
              dump_comment(so, buf);
            }
  
            break;
          case COMMENT_PERL_STYLE:
          case COMMENT_CPP_STYLE:
            if (c == '\n') {
              in_comment = COMMENT_NONE;
              prev_char = 0;
  
              if (options.include_comment_chars)
                buf[o - 1] = 0;
              else 
                buf[o - 1] = 0;
  
              dump_comment(so, buf);
            }
  
            break;
          default:
            fprintf(stderr, "WARNING: Ignoring unknown comment style: %d\n", 
                    in_comment);
            in_comment = COMMENT_NONE;
            break;
        }
     } else if ((in_quotes == QUOTE_NONE || in_quotes == QUOTE_DOUBLE) && 
                c == '"' && prev_char != '\\') {
       in_quotes = in_quotes ? QUOTE_NONE : QUOTE_DOUBLE;
     } else if ((in_quotes == QUOTE_NONE || in_quotes == QUOTE_SINGLE) &&
                c == '\'' && prev_char != '\\') {
       in_quotes = in_quotes ? QUOTE_NONE : QUOTE_SINGLE;
     } else if (in_quotes == QUOTE_NONE && c == '#' && options.perl &&
                prev_char != '\\') {
       in_comment = COMMENT_PERL_STYLE;
  
       so = i;
       o = 0;
       if (options.include_comment_chars) {
         buf[o++] = c;
       }
     } else if (in_quotes == QUOTE_NONE && c == '/' && prev_char == '/') {
       in_comment = COMMENT_CPP_STYLE;
  
       so = i;
       o = 0;
       if (options.include_comment_chars) {
         buf[o++] = prev_char;
         buf[o++] = c;
       }
     } else if (in_quotes == QUOTE_NONE && c == '*' && prev_char == '/') {
       in_comment = COMMENT_C_STYLE;
  
       so = i;
       o = 0;
  
       if (options.include_comment_chars) {
         buf[o++] = prev_char;
         buf[o++] = c;
       }
     }
      
      prev_char = c;
      i++;
    }
  }

  fflush(stdout);
  return EXIT_SUCCESS;
}

void dump_comment(int offset, char *buf) {
  if (VERBOSE_OUTPUT)
    printf("<comment offset=\"%d\">%s</comment>\n", offset, buf);
  else 
    puts(buf);

  return;
}

void handle_options(int argc, char *argv[]) {
  int i = 0;

  /* set up option defaults */
  options.inputs = NULL;
  options.include_comment_chars = INCLUDE_COMMENT_CHARS;
  options.perl = ALLOW_PERL_COMMENTS;
  
  /* option parsing */
  for (i = 1; i < argc; i++) {
    if (!strncasecmp(argv[i], "-h", 3) ||
        !strncasecmp(argv[i], "-?", 3) ||
        !strncasecmp(argv[i], "--help", 7)) {
      print_usage(argv[0]);
    } else if (!strncasecmp(argv[i], "-p", 3) ||
               !strncasecmp(argv[i], "--perl", 7)) {
      options.perl = 1;
    } else if (!strncasecmp(argv[i], "-i", 3) ||
               !strncasecmp(argv[i], "--include", 10) ||
               !strncasecmp(argv[i], "--include-chars", 16)) {
      options.include_comment_chars = 1;
    } else {
      /* unknown option -- assume it's a filename and append it to the
       * list of input files */
      options.inputs = s_list_append(options.inputs, argv[i]);
    }
  }

  /* if we weren't passed any files, then set up the default input */
  if (!options.inputs)
    options.inputs = s_list_append(options.inputs, strdup("-"));

  return;
}

void print_usage(char *app) {
  fprintf(stderr, 
    "ec - extract comments from C and C++ files\n"
    "by Paul Duncan (pabs) <pabs@pablotron.org> and\n"
    "   Brian Almieda (bma) <bma@tynian.net>\n"
    "\n"
    "Usage:\n"
    "  %s <options> [files]\n"
    "\n"
    "Options:\n"
    "  -h, -?, --help : Print this usage screen.\n"
    "  -p, --perl     : Allow Perl/PHP/Ruby/shell-style comments.\n"
    "  -i, --include  : Include comment characters in output.\n"
    "  files          : Input files (\"-\" for stdin).  If no files are\n"
    "                   specified, input defaults to standard input.\n"
    "\n"
    "Examples:\n"
    "  %s < bleh.c > bleh_comments.txt\n"
    "  %s -i *.c > all_comments.txt\n"
    "  %s -p *.pl > all_perl_comments.txt\n"
    "\n"
    "Notes:\n"
    "  - The Perl comment parsing does not disable C-style comment\n"
    "    parsing.  This is to be construed as a feature ;).\n",
    app, app, app, app);

  exit(EXIT_FAILURE);
}

#define SLIST_PFX "s_list_append(): FATAL: "
SList *s_list_append(SList *list, void *value)
{
  SList *n, *v;

  if (!(v = malloc(sizeof(SList)))) {
    fprintf(stderr, "FATAL: s_list_append(): " 
                    "Couldn't allocate memory for node: %s\n",
                    strerror(errno));
    exit(EXIT_FAILURE);
  }

  v->value = value;
  v->next = NULL;

  if (list) {
    /* append to existing list */
    for (n = list; n->next; n = n->next);
    n->next = v;
  } else {
    /* allocate memory for new list */
    list = v;
  }

  return list;
}

/* Compile line for this code; do this to compile me:
 * eval $(tail -1 ec.c) (or !eval $(tail -1 %) in vim)
cc -o ec -O2 -W -Wall -g -pedantic ec.c # */

