mcc
mcc is a machine code compiler.
Log | Files | << Repositories
tree 7c009d3514b7ab4423ed4ffafc2c463e4c2dde6f parent dff1203fd896f95a87dff1deea2222c74081953f author esote <esote.net@gmail.com> 1563058422 -0500 committer esote <esote.net@gmail.com> 1563058422 -0500 gpgsig -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTXAxYDuIzimYoNSPuhTmRAjzzC8gUCXSpg+gAKCRChTmRAjzzC 8hjrAP9hNfj1z84uSO1Ll9WhxGlKkO5HITzZiuHF4panKbLFnQEAuRfx0qd2BPuY 3H3NLrNmwr/VU/al53E+5OTmXWTsXQU= =Qlq9 -----END PGP SIGNATURE----- Parse hex
README | 9 ++--- mcc.1 | 9 +++-- mcc.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 106 insertions(+), 32 deletions(-)
diff --git a/README b/README index f86604b..b0927f1 100644 --- a/README +++ b/README @@ -2,12 +2,13 @@ NAME mcc - machine code compiler SYNOPSIS - mcc [-3s] [-b addr] [-m memsize] [-o file] [-t addr] file + mcc [-3hs] [-b addr] [-m memsize] [-o file] [-t addr] file DESCRIPTION - mcc is a machine code compiler. It reads ASCII text '0' and '1' mapping - to machine code instructions. All other characters are ignored except - the comment token ';'. Comments begin with ';' and continue until EOL. + mcc is a machine code compiler. It reads binary ASCII text '0' and '1' + mapping to machine code instructions. All other characters are ignored + except the comment token ';'. Comments begin with ';' and continue until + EOL. The -h option enables parsing hexadecimal rather than binary text. Use the -o option to specify the output filename. The default is to put the executable in a.out. diff --git a/mcc.1 b/mcc.1 index 3bcd3eb..751b45c 100644 --- a/mcc.1 +++ b/mcc.1 @@ -14,7 +14,7 @@ .\" You should have received a copy of the GNU Affero General Public License .\" along with this program. If not, see <https://www.gnu.org/licenses/>. .\" -.Dd $Mdocdate: June 16 2019 $ +.Dd $Mdocdate: July 13 2019 $ .Dt MCC 1 .Os .Sh NAME @@ -22,7 +22,7 @@ .Nd machine code compiler .Sh SYNOPSIS .Nm mcc -.Op Fl 3s +.Op Fl 3hs .Op Fl b Ar addr .Op Fl m Ar memsize .Op Fl o Ar file @@ -31,9 +31,12 @@ file .Sh DESCRIPTION .Nm mcc is a machine code compiler. -It reads ASCII text '0' and '1' mapping to machine code instructions. +It reads binary ASCII text '0' and '1' mapping to machine code instructions. All other characters are ignored except the comment token ';'. Comments begin with ';' and continue until EOL. +The +.Fl h +option enables parsing hexadecimal rather than binary text. .Pp Use the .Fl o diff --git a/mcc.c b/mcc.c index a17f2e0..b17eae8 100644 --- a/mcc.c +++ b/mcc.c @@ -28,8 +28,8 @@ #include "mcc.h" #include "write.h" -uint64_t byte_len(FILE *const); -void write_mcode(FILE *const, FILE *const); +uint64_t byte_len(FILE *const, int const); +void write_mcode(FILE *const, FILE *const, int const); void write_section_names(FILE *const, struct mcc_opts const *const); #define COMMENT ';' @@ -44,6 +44,7 @@ main(int argc, char *argv[]) char const *iname; char const *oname; int ch; + int hex; int use_64; int use_bss_addr; int use_text_addr; @@ -55,13 +56,15 @@ main(int argc, char *argv[]) oname = "a.out"; + hex = 0; + /* default 64-bit */ use_64 = 1; use_bss_addr = 0; use_text_addr = 0; - while ((ch = getopt(argc, argv, "3b:m:o:st:")) != -1) { + while ((ch = getopt(argc, argv, "3b:hm:o:st:")) != -1) { switch (ch) { case '3': use_64 = 0; @@ -77,6 +80,9 @@ main(int argc, char *argv[]) use_bss_addr = 1; break; + case 'h': + hex = 1; + break; case 'm': opts.mem.n64 = strtoull(optarg, &end, 10); @@ -130,7 +136,7 @@ main(int argc, char *argv[]) err(1, "open out"); } - opts.len.n64 = byte_len(in); + opts.len.n64 = byte_len(in, hex); if (!use_64) { if (opts.mem.n64 > UINT32_MAX) { @@ -170,7 +176,7 @@ main(int argc, char *argv[]) } } - write_mcode(in, out); + write_mcode(in, out, hex); write_section_names(out, &opts); if (use_64) { @@ -209,7 +215,7 @@ main(int argc, char *argv[]) } uint64_t -byte_len(FILE *const in) +byte_len(FILE *const in, int const hex) { uint64_t i; uint64_t len; @@ -231,13 +237,42 @@ byte_len(FILE *const in) break; case '0': case '1': - if (++i == 8) { - i = 0; - len++; + if (hex) { + i += 4; + } else { + i++; + } + break; + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + if (hex) { + i += 4; } - break; } + + if (i == 8) { + i = 0; + len++; + } } out: @@ -253,39 +288,74 @@ out: } void -write_mcode(FILE *const in, FILE *const out) +write_mcode(FILE *const in, FILE *const out, int const hex) { + size_t const bytesize = hex ? 2 : 8; + int const base = hex ? 16 : 2; + + char buf[bytesize+1]; uint64_t i; - unsigned char buf; + char *end; unsigned char cur; + char rbuf; - cur = 0; + buf[bytesize] = '\0'; - for (i = 8; fread(&buf, 1, 1, in) == 1; ) { - switch (buf) { + for (i = 0; fread(&rbuf, 1, 1, in);) { + switch (rbuf) { case COMMENT: /* gulp until newline or EOF */ while (1) { - if (fread(&buf, 1, 1, in) == 0) { + if (fread(&rbuf, 1, 1, in) == 0) { return; - } else if (buf == '\n') { + } else if (rbuf == '\n') { break; } } break; case '0': case '1': - cur = (unsigned char)(cur | (buf - '0') << --i); + buf[i++] = rbuf; + break; + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + if (hex) { + buf[i++] = rbuf; + } + break; + } - if (i == 0) { - if (fwrite(&cur, 1, 1, out) != 1) { - err(1, "write mcode"); - } + if (i == bytesize) { + cur = (unsigned char)strtol(buf, &end, base); + + if (errno == EINVAL || errno == ERANGE) { + err(1, "mcode byte invalid"); + } else if (buf == end) { + err(1, "mcode no byte read"); + } - i = 8; - cur = 0; + i = 0; + if (fwrite(&cur, 1, 1, out) != 1) { + err(1, "write mcode"); } - break; } } }