mcc

mcc is a machine code compiler.

Log | Files | << Repositories


tree 7c009d3514b7ab4423ed4ffafc2c463e4c2dde6f
parent dff1203fd896f95a87dff1deea2222c74081953f
author esote <esote.net@gmail.com> 1563058422 -0500
committer esote <esote.net@gmail.com> 1563058422 -0500
gpgsig -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTXAxYDuIzimYoNSPuhTmRAjzzC8gUCXSpg+gAKCRChTmRAjzzC
 8hjrAP9hNfj1z84uSO1Ll9WhxGlKkO5HITzZiuHF4panKbLFnQEAuRfx0qd2BPuY
 3H3NLrNmwr/VU/al53E+5OTmXWTsXQU=
 =Qlq9
 -----END PGP SIGNATURE-----

Parse hex

 README |   9 ++---
 mcc.1  |   9 +++--
 mcc.c  | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 106 insertions(+), 32 deletions(-)

diff --git a/README b/README
index f86604b..b0927f1 100644
--- a/README
+++ b/README
@@ -2,12 +2,13 @@ NAME
 	mcc - machine code compiler
 
 SYNOPSIS
-	mcc [-3s] [-b addr] [-m memsize] [-o file] [-t addr] file
+	mcc [-3hs] [-b addr] [-m memsize] [-o file] [-t addr] file
 
 DESCRIPTION
-	mcc is a machine code compiler. It reads ASCII text '0' and '1' mapping
-	to machine code instructions. All other characters are ignored except
-	the comment token ';'. Comments begin with ';' and continue until EOL.
+	mcc is a machine code compiler. It reads binary ASCII text '0' and '1'
+	mapping to machine code instructions. All other characters are ignored
+	except the comment token ';'. Comments begin with ';' and continue until
+	EOL. The -h option enables parsing hexadecimal rather than binary text.
 
 	Use the -o option to specify the output filename. The default is to put
 	the executable in a.out.
diff --git a/mcc.1 b/mcc.1
index 3bcd3eb..751b45c 100644
--- a/mcc.1
+++ b/mcc.1
@@ -14,7 +14,7 @@
 .\" You should have received a copy of the GNU Affero General Public License
 .\" along with this program.  If not, see <https://www.gnu.org/licenses/>.
 .\"
-.Dd $Mdocdate: June 16 2019 $
+.Dd $Mdocdate: July 13 2019 $
 .Dt MCC 1
 .Os
 .Sh NAME
@@ -22,7 +22,7 @@
 .Nd machine code compiler
 .Sh SYNOPSIS
 .Nm mcc
-.Op Fl 3s
+.Op Fl 3hs
 .Op Fl b Ar addr
 .Op Fl m Ar memsize
 .Op Fl o Ar file
@@ -31,9 +31,12 @@ file
 .Sh DESCRIPTION
 .Nm mcc
 is a machine code compiler.
-It reads ASCII text '0' and '1' mapping to machine code instructions.
+It reads binary ASCII text '0' and '1' mapping to machine code instructions.
 All other characters are ignored except the comment token ';'.
 Comments begin with ';' and continue until EOL.
+The
+.Fl h
+option enables parsing hexadecimal rather than binary text.
 .Pp
 Use the
 .Fl o
diff --git a/mcc.c b/mcc.c
index a17f2e0..b17eae8 100644
--- a/mcc.c
+++ b/mcc.c
@@ -28,8 +28,8 @@
 #include "mcc.h"
 #include "write.h"
 
-uint64_t	byte_len(FILE *const);
-void		write_mcode(FILE *const, FILE *const);
+uint64_t	byte_len(FILE *const, int const);
+void		write_mcode(FILE *const, FILE *const, int const);
 void		write_section_names(FILE *const, struct mcc_opts const *const);
 
 #define COMMENT	';'
@@ -44,6 +44,7 @@ main(int argc, char *argv[])
 	char const *iname;
 	char const *oname;
 	int ch;
+	int hex;
 	int use_64;
 	int use_bss_addr;
 	int use_text_addr;
@@ -55,13 +56,15 @@ main(int argc, char *argv[])
 
 	oname = "a.out";
 
+	hex = 0;
+
 	/* default 64-bit */
 	use_64 = 1;
 
 	use_bss_addr = 0;
 	use_text_addr = 0;
 
-	while ((ch = getopt(argc, argv, "3b:m:o:st:")) != -1) {
+	while ((ch = getopt(argc, argv, "3b:hm:o:st:")) != -1) {
 		switch (ch) {
 		case '3':
 			use_64 = 0;
@@ -77,6 +80,9 @@ main(int argc, char *argv[])
 
 			use_bss_addr = 1;
 			break;
+		case 'h':
+			hex = 1;
+			break;
 		case 'm':
 			opts.mem.n64 = strtoull(optarg, &end, 10);
 
@@ -130,7 +136,7 @@ main(int argc, char *argv[])
 		err(1, "open out");
 	}
 
-	opts.len.n64 = byte_len(in);
+	opts.len.n64 = byte_len(in, hex);
 
 	if (!use_64) {
 		if (opts.mem.n64 > UINT32_MAX) {
@@ -170,7 +176,7 @@ main(int argc, char *argv[])
 		}
 	}
 
-	write_mcode(in, out);
+	write_mcode(in, out, hex);
 	write_section_names(out, &opts);
 
 	if (use_64) {
@@ -209,7 +215,7 @@ main(int argc, char *argv[])
 }
 
 uint64_t
-byte_len(FILE *const in)
+byte_len(FILE *const in, int const hex)
 {
 	uint64_t i;
 	uint64_t len;
@@ -231,13 +237,42 @@ byte_len(FILE *const in)
 			break;
 		case '0':
 		case '1':
-			if (++i == 8) {
-				i = 0;
-				len++;
+			if (hex) {
+				i += 4;
+			} else {
+				i++;
+			}
+			break;
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+			if (hex) {
+				i += 4;
 			}
-
 			break;
 		}
+
+		if (i == 8) {
+			i = 0;
+			len++;
+		}
 	}
 
 out:
@@ -253,39 +288,74 @@ out:
 }
 
 void
-write_mcode(FILE *const in, FILE *const out)
+write_mcode(FILE *const in, FILE *const out, int const hex)
 {
+	size_t const bytesize = hex ? 2 : 8;
+	int const base = hex ? 16 : 2;
+
+	char buf[bytesize+1];
 	uint64_t i;
-	unsigned char buf;
+	char *end;
 	unsigned char cur;
+	char rbuf;
 
-	cur = 0;
+	buf[bytesize] = '\0';
 
-	for (i = 8; fread(&buf, 1, 1, in) == 1; ) {
-		switch (buf) {
+	for (i = 0; fread(&rbuf, 1, 1, in);) {
+		switch (rbuf) {
 		case COMMENT:
 			/* gulp until newline or EOF */
 			while (1) {
-				if (fread(&buf, 1, 1, in) == 0) {
+				if (fread(&rbuf, 1, 1, in) == 0) {
 					return;
-				} else if (buf == '\n') {
+				} else if (rbuf == '\n') {
 					break;
 				}
 			}
 			break;
 		case '0':
 		case '1':
-			cur = (unsigned char)(cur | (buf - '0') << --i);
+			buf[i++] = rbuf;
+			break;
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+			if (hex) {
+				buf[i++] = rbuf;
+			}
+			break;
+		}
 
-			if (i == 0) {
-				if (fwrite(&cur, 1, 1, out) != 1) {
-					err(1, "write mcode");
-				}
+		if (i == bytesize) {
+			cur = (unsigned char)strtol(buf, &end, base);
+
+			if (errno == EINVAL || errno == ERANGE) {
+				err(1, "mcode byte invalid");
+			} else if (buf == end) {
+				err(1, "mcode no byte read");
+			}
 
-				i = 8;
-				cur = 0;
+			i = 0;
+			if (fwrite(&cur, 1, 1, out) != 1) {
+				err(1, "write mcode");
 			}
-			break;
 		}
 	}
 }