Create a gist now

Instantly share code, notes, and snippets.

anonymous /SIMPL -Expansion 45
Created Nov 11, 2015

What would you like to do?
A minimal text interpreter - along the lines of Forth
// SIMPL - EXPANSION 45
// Illustrates how to losbloat with recoding UART routines
#include <avr/io.h>
#define F_CPU 16000000UL
#define BAUD 115200
#include <util/setbaud.h>
// A Serial Interpreted Minimal Programming Language - Inspired by Txtzyme - by Ward Cunningham
// This is the compressor-expander version of SIMPL - using the low-fat direct UART routines for the ATmega
// By removing Serial.print library and replacing setup() and loop() with a main loop and while(1) statement code is massively reduced - to about 2K bytes
// Build a pre-processor (compressor) onto the front of SIMPL so that more verbose form of colon definitions can be handled.
// Creates a dictionary header (4 bytes) from the first 3 characters of the word plus the word length.
// Additionally an expander- allowing minimal SIMPL code to be expanded out to the more verbose format.
// It provides the mechanisms to expand a single character up to a full keyword, and to take
// a keyword and substitute it for a single character
// Allows for more verbose dictionary words and the means to convert between SIMPL and verbose
// SIMPL provides about a 4:1 compaction of source code over the full verbose text
// These two mechanisms form the basis of a simple assembler-dissassembler
// The expansion is the simplest - just a look-up within an array of keywords based on the character and
// print out the keyword found at that location.
// Compression is a little more involved as it has to search the array of keywords - until it finds a match
// and then find the address of that keyword within the array.
// This however may be simplified by just checking the first 3 letters of the keyword - forming a unique number,
// and then doing a switch-case selection based on that number.
// With these two substitution mechanisms in place, it is then possible to write simple cross assemblers & disassemblers
// A full dictionary search based on first three characters and length could be implemented later for the User vocabulary
// Word storage format - compressed into 4 bytes
// Char1, Char2, Char3, Len
// Byte 0 1 2 3
// So a word can be expanded by knowing its length and the dictionary pointer to its 1st character
// The jump address pointer is calculated during the search as a single byte to allow fast look-up frpm a table of addresses
/*
ASCII Table
32 space
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
48 0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
64 @
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
[
\
]
^
_
'
abcdefghijklmnopqrstuvwxyz
{
|
}
~
DEL
*/
// Following table contains the keywords - these will be in the order of the ascii character - so easily addressed
/*
char word_table[26] [8] = {
{"ANALOG "},
{"BREAK "},
{"CHECK "},
{"DIGIT "},
{"ELSE "},
{"FOR "},
{"GET "},
{"HIGH "},
{"IF "},
{"JUMP "},
{"KEY "},
{"LOW "},
{"MILLIS "},
{"NUMBER "},
{"OUTPUT "},
{"PUT "},
{"QUIT "},
{"RTC "},
{"SWITCH "},
{"THEN "},
{"MICROS "},
{"VAR "},
{"WHILE "},
{"TOP "},
{"SECOND "},
{"NAP "}
};
*/
char word_table[512] ="ANA6BRE5CHE5DIG5ELS4FOR3GET3HIG4IF 2JUM4KEY3LOW3MIL6NUM6OUT6PUT3QUI4RTC3SWI6THE4MIC6VAR3WHI5TOP3SEC6NAP3HAT3ADD3AND3OR 2XOR3LIT3SUB3MUL3DIV3NEG3INV3JPZ3JMP3CAL4RET3LT 2EQ 2GT 2MOD3DEC3HEX3PUS4R@ 3POP3! 1# 1$ 1% 1' 1( 1) 1* 1+ 1- 1. 1/ 1[ 1] 1^ 1_ 1{ 1| 1} 1~ 1";
// ADD3AND3OR 2XOR3LIT3SUB3MUL3DIV3NEG3INV3JPZ3JMP3
// JMP3JPZ3CAL4RET3LT 2EQ 2GT 2MOD3DEC3HEX3PUS4R@ 3POP3
// ! 1# 1$ 1% 1& 1' 1( 1) 1* 1+ 1, 1- 1. 1/ 1[ 1\ 1] 1^ 1_ 1' 1{ 1| 1} 1~ 1";
char dictionary[512]; // Storage for the users dictionary
char headers[512]; // Define a 128 x 4 array for the colon definition headers
char buf[128];
#define bufRead(addr) (*(unsigned char *)(addr))
#define bufWrite(addr, b) (*(unsigned char *)(addr) = (b))
unsigned char bite;
unsigned long x = 0; // Three gen purpose variables
unsigned long y = 0;
unsigned int z = 0;
int len = 48;
long old_millis=0;
long new_millis=0;
/*
char array[26][4] = { // Define a 26 x 48 array for the colon definitions
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
{""},
};
*/
int header_pointer = headers[0]; // initialise header_pointer to start of headers array
int header_end = 0;
int match, match0, match1, match2, match3, match_address = 0;
int a = 0; // integer variables a,b,c,d
int b = 0;
int c = 0;
int d = 6; // d is used to denote the digital port pin for I/O operations
char name1; // First character of name
char name2; // Second character of name
char name3; // Third character of name
int word_len; // The length of the word
int word_number = 0;
char colon = 0;
char num_buf[11]; // long enough to hold a 32 bit long
int decade = 0;
char digit = 0;
long D_num = 0;
long D_val = 0;
long D_decade = 0;
int k = 0;
char* parray;
char* addr;
int dict_pointer;
//--------------------------------------------------------------------------------------------------------------------
int main()
{
uart_init(); // Enable UART
DDRD = DDRD | B11111100; // Sets pins 2 to 7 as outputs without changing the value of pins 0 & 1, which are RX & TX
for(int i=0; i<=128; i++)
{
bufWrite((buf + i ),32); // Clear the buffer with spaces
}
//--------------------------------------------------------------------------------------------------------------------
while(1) // Main loop
{
txtRead(buf, 100); // read the text into the buffer
colon_check(buf); // check if it is a colon definition
if(!colon)
{
word_scan(buf); // if it's not a colon definition - scan any way and form header entry
}
look_up(); // Check for a match to known words
dictionary_list();
// header_list();
// bufPrint();
// txt_emit();
// txtEval(buf);
}
}
//--------------------------------------------------------------------------------------------------------------------
void txtRead (char *p, byte n) // Read any serial input into a buffer until you see \r or \n
{
byte i = 0;
while (i < (n-1)) {
char ch = u_getchar(); // get the character from the buffer
if (ch == '\r' || ch == '\n') break;
if (ch >= ' ' && ch <= '~') {
*p++ = ch;
i++;
}
}
*p = 0;
}
//--------------------------------------------------------------------------------------------------------------------
void colon_check (char *buf) { // Check if the text starts with a colon - so it's a new definition, and if so store in temp[]
char ch;
int i =0;
while ((ch = *buf++)){
if (ch == ':') {
colon = 1;
build_buffer(buf);
}
} // End of checking for colon definition
}
//------------------------------------------------------------------------------------------------------------------------
void word_scan(char *buf) { // Scan the word and build up the header entry
char ch;
int i =0;
while ((ch = *buf++)){
if (ch != ':' ) {
build_buffer(buf);
}
} // End of checking for colon definition
}
//------------------------------------------------------------------------------------------------------------------------
void build_buffer(char *buf)
{
char ch;
word_len = 0; // reset word_len
name1 = *buf ; // extract the first three characters of the word
name2 = *(buf+1);
name3 = *(buf+2);
if(!*(buf+1)) {name2 = 0;}
if(!*(buf+2)) {name3 = 0;}
if(32== *(buf+1)) {name2 = 32;}
if(32== *(buf+2)) {name3 = 32;}
ch=*buf; // get the
while(ch != ' ') // look for terminating space
{
word_len++; // get the length (+1)
ch=*buf;
bufWrite((dictionary + dict_pointer),*buf); // Write it into the dictionary array
buf++;
dict_pointer++; // Update the dictionary pointer to keep track of dictionary
}
// Now get the first three characters of the word, and the word_len and make up the 4 byte dictionary header
// Eg ANALOG -> ANA6 remembering that there will be a space or null on the end of the string
bufWrite((headers + header_pointer), name1);
bufWrite((headers + header_pointer+1),name2);
bufWrite((headers + header_pointer+2),name3);
bufWrite((headers + header_pointer+3),word_len); // write the word_len to the 4th byte of the header
u_putchar(headers[header_pointer]); // get the name from the first character
u_putchar(headers[header_pointer+1]);
u_putchar(headers[header_pointer+2]);
u_putchar(headers[header_pointer+3]+47); // Length is ascii - convert to num
u_putchar(0x20); //space
buf--;
bufWrite((dictionary + dict_pointer ),*buf); // Write the trailing space
word_len = 0;
}
//--------------------------------------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------------------------------
void bufPrint(void)
{
u_putchar(headers[header_pointer]); // get the name from the first character
u_putchar(headers[header_pointer+1]);
u_putchar(headers[header_pointer+2]);
u_putchar(headers[header_pointer+3]-1);
u_putchar(0x20);
printlong(header_pointer);
int len = int (headers[header_pointer+3]); // get the word length back
u_putchar(0x20);
printlong(len);
} // End of text buf_print
//------------------------------------------------------------------------------------------------------------
void dictionary_add(void) // add the last scanned word onto the end of the dictionary
{
}
//------------------------------------------------------------------------------------------------------------
void header_add(void) // add the last scanned word's header to the end of the headers
{
}
//------------------------------------------------------------------------------------------------------------
void dictionary_list(void) // list the dictionary in full
{
// Now print out the contents of the dictionary entries - so far
parray = &dictionary[0]; // reset parray to the pointer to the first dictionary element
for (int j = 0; j<=(dict_pointer); j++)
// for (int j = (dict_pointer - len ); j<=(dict_pointer); j++)
{
bite = bufRead( parray + j-1 ); // read the array
u_putchar(bite); // print the character to the serial port
}
crlf(); // newline
}
//------------------------------------------------------------------------------------------------------------
void header_list(void) // list out the headers
{
// Now print out the contents of the dictionary headers - so far
parray = &headers[0]; // reset parray to the pointer to the first dictionary element
for (int j = 0; j<=(header_pointer+3); j++)
{
bite = bufRead( parray + j ); // read the array
u_putchar(bite); // print the character to the serial port
}
// Serial.print(bufRead( parray + 3 ));
u_putchar(0x20);
// header_pointer = header_pointer+8; // move along to next definition slot
header_end = header_pointer+4;
printlong(header_pointer);
}
//------------------------------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------------------------------------
void look_up(void) // compare the header of the newly enterd word - with the headers list
{
for(int j = 0; j<=512; j = j+4) // step through all the header entries looking for a match first 4 bytes have to be the same
{
/*
match0 = headers[header_pointer] - headers[j] ;
match1 = headers[header_pointer+1] - headers[j + 1];
match2 = headers[header_pointer+2] - headers[j + 2];
match3 = headers[header_pointer+3] - headers[j + 3];
*/
match0 = headers[header_pointer] - word_table[j] ;
match1 = headers[header_pointer+1] - word_table[j+1];
match2 = headers[header_pointer+2] - word_table[j+2];
match3 = headers[header_pointer+3] - word_table[j+3];
match_address = 255; // Signifies no match
match = 0;
if(match0 == 0 && match1 == 0 && match2 ==0 ) // match will be zero if all 4 characters match
{
match = 1; // we have a match on the keyword!
match_address = j/4;
printnum(match_address);
u_putchar(0x20);
u_putchar(79); //OK
u_putchar(75);
u_putchar(10);
u_putchar(13);
}
}
if(!match)
{
// If there is a match we only want to extract the jump address from the header
// If there is no match, this means a new word, so we add it to the end of the dictionary and add its header to the tali end of the headers and increment the headers
header_pointer = header_pointer+4;
}
}
//--------------------------------------------------------------------------------------------------------------------
void txt_emit(void)
{
parray = &dictionary[0]; // reset parray to the pointer to the first dictionary element
char chr = bufRead(parray);
while(chr != ';') // look for terminating colon
{
bite = bufRead( parray++ ); // read the array
chr = bite;
u_putchar(bite); // print the character to the serial port
}
u_putchar(0x20);
printlong(dict_pointer);
} // end of txt_emit
//--------------------------------------------------------------------------------------------------------------------
/*
void txtEval (char *buf) {
unsigned int k = 0;
char *loop;
char *start;
char ch;
while ((ch = *buf++)) {
switch (ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
x = ch - '0';
while (*buf >= '0' && *buf <= '9') {
x = x*10 + (*buf++ - '0');
}
break;
case 'p':
Serial.println(x);
break;
*/
/*
case 'a':
a = x;
break;
*/
/*
case 'b':
Serial.println(millis());
break;
case 'c':
Serial.println(micros());
break;
case 'd':
d = x;
break;
case 'A': // Point the interpreter to the array containing the words
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
name1 = ch - 65;
addr = parray + (len*name1);
txtEval(addr);
break;
case 'a':
analogWrite(d,x);
break;
case '!': // store
y = x;
break;
case '@':
x = y;
break;
case '+':
x = x+y;
break;
case '-':
x = x-y;
break;
case '*':
x = x*y;
break;
case '/':
x = x/y;
break;
case '<':
if(x<y){x=1;} // If x<y x= 1 - can be combined with jump j
else x=0;
break;
case '>':
if(x>y){x=1;} // If x>y x= 1 - can be combined with jump j
else x=0;
break;
case 'j': // test if x = 1 and jump next instruction
if(x==1){*buf++;}
break;
case 'l':
y = y + 1;
break;
case 'x':
x = x + 1;
break;
case 'y':
y = y + 1;
break;
case 'n': // Output an 8 bit value on I/O Dig 2 - Dig 9
// Can be extended to 12 bits on Dig 2 - Dig 13
if(x>=128){digitalWrite(9,HIGH); x = x- 128;} else {digitalWrite(9,LOW);}
if(x>=64){digitalWrite(8,HIGH); x = x- 64;} else {digitalWrite(8,LOW);}
if(x>=32){digitalWrite(7,HIGH); x = x- 32;} else {digitalWrite(7,LOW);}
if(x>=16){digitalWrite(6,HIGH); x = x- 16;} else {digitalWrite(6,LOW);}
if(x>=8){digitalWrite(5,HIGH); x = x- 8;} else {digitalWrite(5,LOW);}
if(x>=4){digitalWrite(4,HIGH); x = x- 4;} else {digitalWrite(4,LOW);}
if(x>=2){digitalWrite(3,HIGH); x = x- 2;} else {digitalWrite(3,LOW);}
if(x>=1){digitalWrite(2,HIGH); x = x- 1;} else {digitalWrite(2,LOW);}
break;
*/
/*
case '?': // Print out all the RAM
// parray = &array[0][0]; // reset parray to the pointer to the first element
parray = &word_table[0][0];
for (int j = 0; j<26; j++) {
// Serial.write(j+65); // print the caps word name
// Serial.write(20); // space
for (int i=0; i<=7; i++) {
bite = bufRead( parray + (j * 8 )+i); // read the array
Serial.write(bite); // print the character to the serial port
if(bite==32){break;} // break out if its a space character
}
// Serial.println();
}
for(int i = 0; i <11; i++) {
Serial.println();
}
break;
*/
/*
//--------------------------------------------------------------------------------------------------------------
case '?':
{
// Print out all the RAM
parray = &dictionary[0]; // reset parray to the pointer to the first dictionary element
// parray = &word_table[0][0];
for (int j = 0; j<dict_pointer; j++) {
bite = bufRead( parray + j ); // read the array
Serial.write(bite); // print the character to the serial port
}
// Serial.println();
}
//-----------------------------------------------------------------------------------------------------------------------------------------------
// Added 15-2-2015 - all appears to be working
case '(': // The start of a condition test
k = x;
start = buf; // remember the start position of the test
while ((ch = *buf++) && ch != ')') { // get the next character into ch and increment the buffer pointer *buf - evaluate the code
}
case ')':
if (x) { // if x is positive - go around again
buf = start;
}
break;
case '.':
while ((ch = *buf++) && ch != '.') {
// Serial.print(ch);
name1 = ch - 65;
addr = parray + (len*name1);
while ((ch = *addr++) && ch != '.') {
Serial.print(ch);
}
}
Serial.println();
break;
// txtEval(addr);
// break;
case ' ': // Transfer x into second variable y
k=y; // Transfer loop counter into k
y= x;
break;
case '$': // Load x with the ASCII value of the next character i.e. 5 = 35H or 53 decimal
x=*(buf-2);
break;
//-----------------------------------------------------------------------------------------------------------------------------------------------
case 'i':
x = digitalRead(d);
break;
case 'o':
digitalWrite(d, x%2);
break;
case 'm':
delay(x);
break;
case 'u':
delayMicroseconds(x);
break;
case '{':
k = x;
loop = buf;
while ((ch = *buf++) && ch != '}') {
}
case '}':
if (k) {
k--;
buf = loop;
}
break;
case 'k':
x = k;
break;
case '_':
while ((ch = *buf++) && ch != '_') {
Serial.print(ch);
}
Serial.println();
break;
case 's':
x = analogRead(x);
break;
case 't':
Serial.println(micros());
break;
case 'z': // z is a non-blocking pause or nap - measured in "zeconds" which allows UART characters,
old_millis = millis(); // get the millisecond count when you enter the switch-case
while (!Serial.available()||millis()-old_millis<=(x*1000))
{ }
Serial.println("waiting for escape");
// Serial.println("Got a char");
ch = Serial.read();
Serial.println("Got a £");
// Put the idle loop and escape code here
if(ch=='£')
{
Serial.println("Escape");
break;
}
// interrupts or digital Inputs to break the pause
// Serial.println(millis());
// break;
}
}
}
*/
//--------------------------------------------------------------------------------------
// UART Routines
//--------------------------------------------------------------------------------------
void uart_init(void)
{
UBRR0H = UBRRH_VALUE;
UBRR0L = UBRRL_VALUE;
#if USE_2X
UCSR0A |= _BV(U2X0);
#else
UCSR0A &= ~(_BV(U2X0));
#endif
UCSR0C = _BV(UCSZ01) | _BV(UCSZ00); /* 8-bit data */
UCSR0B = _BV(RXEN0) | _BV(TXEN0); /* Enable RX and TX */
}
void u_putchar(char c) {
loop_until_bit_is_set(UCSR0A, UDRE0); /* Wait until data register empty. */
UDR0 = c;
}
char u_getchar(void) {
loop_until_bit_is_set(UCSR0A, RXC0); /* Wait until data exists. */
return UDR0;
}
//-----------------------------------------------------------------------------------------
// Print a 16 bit int number
void printnum(int num)
{
// num is likely going to be a 16 bit iunsigned int - so we are handling up to 5 digits
// We need to test which decade it is in - and convert the leading digit to ascii - remembering to suppress leading zeroes
int num_val = num; // make a copy of num for later
// Extract the digits into the num_buff
decade = 10000;
for (k = 5; k>0; k--)
{
z = num/decade;
num_buf[k]=z+48;
num = num - (decade*z);
decade = decade/10;
}
// Now print out the array - correcting to allow for leading zero suppression
if (num_val == 0)
{
{num_buf[5] = 48;}
}
decade = 10000; // we need to know what decade we are in for leading zero suppression
k=5;
while(num_buf[k]!=0)
{
if(num_buf[k] == 48 && (num_val <= decade)) {k--;} // suppress leading zeroes
else
{
u_putchar(num_buf[k]); // send the number
num_buf[k]=0; // erase the array for next time
k--;
}
decade = decade/10; // update the decade
}
if(!num_val){u_putchar(48); } // separately handle the case when num == 0
}
//----------------------------------------------------------------------------------------------------------
// Print a string
void printstring(char *buf)
{
}
//----------------------------------------------------------------------------------------------------------
// Print a CR-LF
void crlf(void) // send a crlf
{
u_putchar(10);
u_putchar(13);
}
//---------------------------------------------------------------------------------------------------------
// Print a 32 bit integer
void printlong(long D_num)
{
// num is likely going to be a 16 bit iunsigned int - so we are handling up to 5 digits
// We need to test which decade it is in - and convert the leading digit to ascii - remembering to suppress leading zeroes
D_val = D_num; // make a copy of num for later
// Extract the digits into the num_buff
D_decade = 1000000000;
for (k = 10; k>0; k--)
{
z = D_num/D_decade;
num_buf[k]=z+48;
D_num = D_num - (D_decade*z);
D_decade = D_decade/10;
}
// Now print out the array - correcting to allow for leading zero suppression
if (D_val == 0)
{
{num_buf[10] = 48;}
}
D_decade = 1000000000; // we need to know what decade we are in for leading zero suppression
k=10;
while(num_buf[k]!=0)
{
if(num_buf[k] == 48 && (D_val <= D_decade)) {k--;} // suppress leading zeroes
else
{
u_putchar(num_buf[k]); // send the number
num_buf[k]=0; // erase the array for next time
k--;
}
D_decade = D_decade/10; // update the decade
}
if(!D_val){u_putchar(48); } // separately handle the case when num == 0
// crlf();
u_putchar(10);
u_putchar(13);
}
//-----------------------------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment