C ++- 5067字节 4612 * 0.9 * 0.8 = 3320(* 0.9 = 2988如果能够格式化自身就算-用C ++编写)
我意识到这比这里已经介绍的解决方案要大,但是我还是决定发布此内容,因为我在发布xfix的C解决方案之前就开始处理我的版本。
- 它适用于多行注释
- 输出的HTML包含错误(但在Chrome中正确显示)
- 它从input.c中读取并产生output.html
其中一半是大量的C和C ++关键字。
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <locale>
#define B break
#define Z(a,b)if(s[i]==a){z=b;o+=OP+p(s[i]);i++;B;}
#define V(a,b)if(e(s,i,a)){z=b;o+=OC+p(s.substr(i,2));i+=2;B;}
#define N(a) if(s[i]=='\n'){a;o+=nl();i++;B;}
#define Q(a)if(e(s,i,"\\")){o+=p(s.substr(i,2));i+=2;B;}if(s[i]==a){z=0;o+=p(s[i])+CL;i++;B;}
#define C case
using namespace std;string k[]={"__abstract","__alignof","_Alignas","_alignof","and","and_eq","__asm","_asm","asm","__assume","_assume","auto","__based","_based","bitand","bitor","bool","_Bool","__box","break","__builtin_alignof","_builtin_alignof","__builtin_isfloat","case","catch","__cdecl","_cdecl","_Complex","cdecl","char","class","__compileBreak","_compileBreak","compl","const","const_cast","continue","__declspec","_declspec","default","__delegate","delete","do","double","dynamic_cast","else","enum","__event","__except","_except","explicit","__export","_export","extern","false","__far","_far","far","__far16","_far16","__fastcall","_fastcall","__feacpBreak","_feacpBreak","__finally","_finally","float","for","__forceinline","_forceinline","__fortran","_fortran","fortran","friend","_Generic","__gc","goto","__hook","__huge","_huge","huge","_Imaginary","__identifier","if","__if_exists","__if_not_exists","__inline","_inline","inline","int","__int128","__int16","_int16","__int32","_int32","__int64","_int64","__int8","_int8","__interface","__leave","_leave","long","__multiple_inheritance","_multiple_inheritance","mutable","namespace","__near","_near","near","new","_Noreturn","__nodefault","__nogc","__nontemporal","not","not_eq","__nounwind","__novtordisp","_novtordisp","operator","or","or_eq","__pascal","_pascal","pascal","__pin","__pragma","_pragma","private","__probability","__property","protected","__ptr32","_ptr32","__ptr64","_ptr64","public","__raise","register","reinterpret_cast","restrict","__restrict","__resume","return","__sealed","__serializable","_serializable","short","signed","__single_inheritance","_single_inheritance","sizeof","static","static_cast","_Static_assert","__stdcall","_stdcall","struct","__super","switch","__sysapi","__syscall","_syscall","template","this","__thiscall","_thiscall","throw","_Thread_local","__transient","_transient","true","__try","_try","try","__try_cast","typedef","typeid","typename","__typeof","__unaligned","__unhook","union","unsigned","using","__uuidof","_uuidof","__value","virtual","__virtual_inheritance","_virtual_inheritance","void","volatile","__w64","_w64","__wchar_t","wchar_t","while","xor","xor_eq"};string OS="<font color=\"#00FF00\">";string OC="<font color=\"#FFFF00\">";string OK="<font color=\"#0000FF\">";string OP="<font color=\"#FF00FF\">";string CL="</font>";string NL[]={ "<li class=\"li l1\">","<li class=\"li l2\">" };bool lo=1;string nl() {lo=!lo;return "</li>"+NL[lo];}bool r(char c,string s) {for (size_t i=0; i<s.size(); i++)if (c==s[i])return 0;return 0;}bool is(string s,int i) {return !(i<0||i>=s.size())&&((s[i]=='_')||isalpha(s[i]));}bool ic(string s,int i){return !(i<0||i>=s.size())&&(is(s,i)||('0'<=s[i]&&s[i]<='9'));}bool e(string a,int s,string b) {return !(a.size()-s<b.size())&&a.substr(s,b.size())==b;}string p(char c) {switch (c) {C '&':return "&";C '\"':return """;C '\'':return "'";C '<':return "<";C '>':return ">";}stringstream s;s<<c;return s.str();}string p(string s) {string ans="";for (size_t i=0; i<s.size(); i++) {ans+=p(s[i]);}return ans;}string h(string s) {int z=0;size_t i=0;string o ="<html><body><style type=\"text/css\">.l{list-style-type: decimal;margin-top:0;margin-bottom:0;} .li{display:list-item;word-wrap:B-word;} .l1{background-color:#FFFFFF;} .l2{background-color:#EEEEEE;} .cd{white-space:pre;}</style><code class=\"cd\"><ul class=\"l\">";o+=NL[1];for (; i<s.size();) {switch (z) {C 0:{Z('#',6)Z('"',3)Z('\'',2)V("//",4)V("/*",5)N()for (size_t j=0; j<201; j++)if (e(s,i,k[j])&&!ic(s,i+k[j].size())) {o+=OK+p(k[j])+CL;i+=k[j].size();B;}if (is(s,i)) {z=7;o+=p(s[i]);i++;if (i+1==s.size()||!ic(s,i+1)) {z=0;}B;}o+=p(s[i]);i++;B;}o+=p(s[i]);i++;B;C 2:Q('\'')C 3:Q('"')C 4:{N(z=0)o+=p(s[i]); i++;B;}C 5:{if (e(s,i,"*/")) {z=0;o+=p(s.substr(i,2))+CL;i+=2;B;}N()o+=p(s[i]);i++;B;}C 6:{if (s[i]=='\n') {int j=i-1;for (; j>=0&&r(s[j],"\n\t "); j--);if (j<0||s[j] != '\\') {z=0;o+=CL+nl();i++;B;}o+=nl();i++;B;}o+=p(s[i]);i++;B;}C 7:{if (i+1==s.size()||!ic(s,i+1)) {z=0;}o+=p(s[i]);i++;B;}}}o+="</ul></code>";return o;}int main() {ifstream i("input.c");ofstream o("output.html");string cCode((istreambuf_iterator<char>(i)),istreambuf_iterator<char>());o<<h(cCode)<<endl;}
可读版本:
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
using namespace std;
//The 201 keywords from C and C++. Not sure if all of them are listed here!
const size_t NUMBER_OF_KEYWORDS = 201;
string keywords[] = { "__abstract", "__alignof", "_Alignas", "_alignof", "and",
"and_eq", "__asm", "_asm", "asm", "__assume", "_assume", "auto",
"__based", "_based", "bitand", "bitor", "bool", "_Bool", "__box",
"break", "__builtin_alignof", "_builtin_alignof", "__builtin_isfloat",
"case", "catch", "__cdecl", "_cdecl", "_Complex", "cdecl", "char",
"class", "__compileBreak", "_compileBreak", "compl", "const",
"const_cast", "continue", "__declspec", "_declspec", "default",
"__delegate", "delete", "do", "double", "dynamic_cast", "else", "enum",
"__event", "__except", "_except", "explicit", "__export", "_export",
"extern", "false", "__far", "_far", "far", "__far16", "_far16",
"__fastcall", "_fastcall", "__feacpBreak", "_feacpBreak", "__finally",
"_finally", "float", "for", "__forceinline", "_forceinline",
"__fortran", "_fortran", "fortran", "friend", "_Generic", "__gc",
"goto", "__hook", "__huge", "_huge", "huge", "_Imaginary",
"__identifier", "if", "__if_exists", "__if_not_exists", "__inline",
"_inline", "inline", "int", "__int128", "__int16", "_int16", "__int32",
"_int32", "__int64", "_int64", "__int8", "_int8", "__interface",
"__leave", "_leave", "long", "__multiple_inheritance",
"_multiple_inheritance", "mutable", "namespace", "__near", "_near",
"near", "new", "_Noreturn", "__nodefault", "__nogc", "__nontemporal",
"not", "not_eq", "__nounwind", "__novtordisp", "_novtordisp",
"operator", "or", "or_eq", "__pascal", "_pascal", "pascal", "__pin",
"__pragma", "_pragma", "private", "__probability", "__property",
"protected", "__ptr32", "_ptr32", "__ptr64", "_ptr64", "public",
"__raise", "register", "reinterpret_cast", "restrict", "__restrict",
"__resume", "return", "__sealed", "__serializable", "_serializable",
"short", "signed", "__single_inheritance", "_single_inheritance",
"sizeof", "static", "static_cast", "_Static_assert", "__stdcall",
"_stdcall", "struct", "__super", "switch", "__sysapi", "__syscall",
"_syscall", "template", "this", "__thiscall", "_thiscall", "throw",
"_Thread_local", "__transient", "_transient", "true", "__try", "_try",
"try", "__try_cast", "typedef", "typeid", "typename", "__typeof",
"__unaligned", "__unhook", "union", "unsigned", "using", "__uuidof",
"_uuidof", "__value", "virtual", "__virtual_inheritance",
"_virtual_inheritance", "void", "volatile", "__w64", "_w64",
"__wchar_t", "wchar_t", "while", "xor", "xor_eq" };
// Different states
const int NONE = 0;
const int WHITESPACE = 1;
const int CHAR_UNCLOSED = 2;
const int STRING_UNCLOSED = 3;
const int LINE_COMMENT_UNCLOSED = 4;
const int MULTILINE_COMMENT_UNCLOSED = 5;
const int PREPROCESSOR_UNCLOSED = 6;
const int IDENTIFIER = 7;
//Different elements
const string OPEN_STRING = "<font color=\"#00FF00\">";
const string CLOSE_STRING = "</font>";
const string OPEN_COMMENT = "<font color=\"#FFFF00\">";
const string CLOSE_COMMENT = "</font>";
const string OPEN_KEYWORD = "<font color=\"#0000FF\">";
const string CLOSE_KEYWORD = "</font>";
const string OPEN_PREPROCESSOR = "<font color=\"#FF00FF\">";
const string CLOSE_PREPROCESSOR = "</font>";
//Alternating background
const string NEW_LINE[] = { "<li class=\"li l1\">", "<li class=\"li l2\">" };
bool lineOdd = true;
string getNewLineHTML() {
lineOdd = !lineOdd;
return "</li>" + NEW_LINE[lineOdd];
}
//Check if the character is in the string chars
bool inRange(char c, string chars) {
for (size_t i = 0; i < chars.size(); i++)
if (c == chars[i])
return true;
return false;
}
//Check if the character is the start of an identifier
bool isIdentifierStart(string input, int i) {
if (i < 0 || i >= input.size())
return false;
return (input[i] == '_') || ('a' <= input[i] && input[i] <= 'z')
|| ('A' <= input[i] && input[i] <= 'Z');
}
//Check if the character is the continuation of an identifier
bool isIdentifierCont(string input, int i) {
if (i < 0 || i >= input.size())
return false;
return ('0' <= input[i] && input[i] <= '9') || isIdentifierStart(input, i);
}
//Check if a[start + i] == b[i], i<b.size()
bool eqRange(string a, int start, string b) {
if (a.size() - start < b.size())
return false;
return a.substr(start, b.size()) == b;
}
//Escape the sourcecode for HTML
string escape(char c) {
switch (c) {
case '&':
return "&";
case '\"':
return """;
case '\'':
return "'";
case '<':
return "<";
case '>':
return ">";
}
//Is there a better way to do this?
stringstream strm;
strm << c;
return strm.str();
}
string escape(string str) {
string ans = "";
for (size_t i = 0; i < str.size(); i++) {
ans += escape(str[i]);
}
return ans;
}
string highlight(string input) {
//The current state
int state = NONE;
//The current position
size_t i = 0;
//Styles
string output =
"<html><body><style type=\"text/css\">.l{list-style-type: decimal; margin-top: 0; margin-bottom: 0;} .li{ display: list-item; word-wrap: break-word;} .l1{background-color: #FFFFFF;} .l2{background-color: #EEEEEE;} .cd{white-space: pre;}</style><code class=\"cd\"><ul class=\"l\">";
output += NEW_LINE[1];
for (; i < input.size();) {
switch (state) {
case NONE: {
if (input[i] == '#') { //Start a preprocessor statement
state = PREPROCESSOR_UNCLOSED;
output += OPEN_PREPROCESSOR + escape(input[i]);
i++;
break;
}
if (input[i] == '"') { //Start a string
state = STRING_UNCLOSED;
output += OPEN_STRING + escape(input[i]);
i++;
break;
}
if (input[i] == '\'') { //Start a character
state = CHAR_UNCLOSED;
output += OPEN_STRING + escape(input[i]);
i++;
break;
}
if (eqRange(input, i, "//")) { //Start a single line comment
state = LINE_COMMENT_UNCLOSED;
output += OPEN_COMMENT + escape(input.substr(i, 2));
i += 2;
break;
}
if (eqRange(input, i, "/*")) { //Start a multi-line comment
state = MULTILINE_COMMENT_UNCLOSED;
output += OPEN_COMMENT + escape(input.substr(i, 2));
i += 2;
break;
}
if (input[i] == '\n') { //New lines are special!
output += getNewLineHTML();
i++;
break;
}
for (size_t j = 0; j < NUMBER_OF_KEYWORDS; j++) //Iterate through keywords
if (eqRange(input, i, keywords[j])
&& !isIdentifierCont(input, i + keywords[j].size())) { // The keyword can't be a prefix of an identifier, so test for that
output += OPEN_KEYWORD + escape(keywords[j])
+ CLOSE_KEYWORD;
i += keywords[j].size();
break;
}
//Treat identifiers separately because we need to separate identifiers from keywords.
if (isIdentifierStart(input, i)) {
state = IDENTIFIER;
output += escape(input[i]);
i++;
//If the next character is not a part of the identifier, go to the NONE state
if (i + 1 == input.size() || !isIdentifierCont(input, i + 1)) {
state = NONE;
}
break;
}
//Other characters
output += escape(input[i]);
i++;
break;
}
case CHAR_UNCLOSED: {
if (eqRange(input, i, "\\")) { //Treat escape sequences inside quotes
output += escape(input.substr(i, 2));
i += 2;
break;
}
if (input[i] == '\'') { //Close quote
state = NONE;
output += escape(input[i]) + CLOSE_STRING;
i++;
break;
}
output += escape(input[i]); //Other characters go into the literal
i++;
break;
}
case STRING_UNCLOSED: {
if (eqRange(input, i, "\\")) { //Treat escape sequences inside quotes
output += escape(input.substr(i, 2));
i += 2;
break;
}
if (input[i] == '"') { //Close quote
state = NONE;
output += escape(input[i]) + CLOSE_STRING;
i++;
break;
}
output += escape(input[i]); //Other characters go into the literal
i++;
break;
}
case LINE_COMMENT_UNCLOSED: {
if (input[i] == '\n') { //Close comment with new line
state = NONE;
output += CLOSE_COMMENT + getNewLineHTML();
i++;
break;
}
output += escape(input[i]); //Comment body
i++;
break;
}
case MULTILINE_COMMENT_UNCLOSED: {
if (eqRange(input, i, "*/")) { //Close multiline comment
state = NONE;
output += escape(input.substr(i, 2)) + CLOSE_COMMENT;
i += 2;
break;
}
if (input[i] == '\n') { //New lines are special!
output += getNewLineHTML();
i++;
break;
}
output += escape(input[i]); //Comment body
i++;
break;
}
case PREPROCESSOR_UNCLOSED: {
if (input[i] == '\n') { //Close preprocessor statement or go to next line
int j = i - 1;
for (; j >= 0 && inRange(input[j], "\n\t "); j--)
//Seek las non-whitespace character
;
if (j < 0 || input[j] != '\\') { //Check if the last non-whitespace character is a backslash
state = NONE; //... If it isn't, close the preprocessor statement
output += CLOSE_PREPROCESSOR + getNewLineHTML();
i++;
break;
}
output += getNewLineHTML(); //... If it is, we need to extend the preprocessor statement to the next line
i++;
break;
}
output += escape(input[i]);
i++;
break;
}
case IDENTIFIER: {
//If the next character is not a part of the identifier, go to the NONE state
if (i + 1 == input.size() || !isIdentifierCont(input, i + 1)) {
state = NONE;
}
output += escape(input[i]);
i++;
break;
}
}
}
output += "</ul></code>";
return output;
}
int main() {
ifstream input("input.c");
ofstream output("output.html");
std::string cCode((std::istreambuf_iterator<char>(input)),
std::istreambuf_iterator<char>());
output << highlight(cCode) << endl;
}