WIP - Work in progress

The following post is a work in progress, but I thought I'd keep uploading it as a way to push myself to finish it sooner rather than later.

Lambda Calculus: Building little languages

In my work I have had several opportunities to build tooling for other developers. Specifically these have been tools for assembling apps, programs and infrastructure using a high level language.

These have mostly been experimental, either exploring the benefits of different compiler architectures, approaches to optimisation or programming paradigms.

So, I've been stuck with a question:

How do we build a programming language from the smallest number of the simplest parts possible?

Many approaches have been taken to building programming languages and compilers, and there are almost as many approaches to doing so.

A small family tree of programming language models

digraph {
    conf [label="configuration languages"]
    clike [label="C-like languages"]
    Cpp [label="C++"]
    incomplete [label="Turing Incomplete\n(finite time execution)"]
    complete [label="Turing Complete\n(unknown time execution)"]
    HighLevelLangs [label="High level languages"]
    Fsharp [label="F#"]
    Csharp [label="C#"]

    incomplete -> conf
    incomplete -> data
    incomplete -> build

    data -> JSON
    data -> INI

    conf -> YAML
    conf -> INI
    conf -> JSON
    conf -> Toml
    conf -> Starlark

    build -> Starlark
    build -> Meson

    complete -> HighLevelLangs
    complete -> incomplete

    HighLevelLangs -> functional
    HighLevelLangs -> procedural
    HighLevelLangs -> "object oriented"

    procedural -> Fortran
    procedural -> clike
    procedural -> Fsharp

    "object oriented" -> Java
    "object oriented" -> Python
    "object oriented" -> Cpp
    "object oriented" -> Csharp
    "object oriented" -> OCaml
    "object oriented" -> erlang

    clike -> C
    clike -> Csharp
    clike -> Cpp
    clike -> Rust
    clike -> Zig
    clike -> Go
    clike -> JavaScript
    clike -> TypeScript
    clike -> Kotlin

    procedural -> Python
    procedural -> Starlark
    procedural -> commandline

    commandline -> sh
    commandline -> zsh
    commandline -> fsh
    commandline -> bash
    commandline -> perl

    functional -> Haskell
    functional -> Lisp
    functional -> Fsharp
    functional -> OCaml
}
digraph {
    Fsharp [label="F#"]
    Cpp [label="C++"]
    Csharp [label="C#"]
    dotNET [label=".NET (MSIL/CIL)"]
    LLVMIR [label="LLVM IR"]
    browser [label="Browser IR"]
    run [color=red]

    browser -> run [label="browser", color=red]
    JVM -> run [label="java", color=red]
    dotNET -> run [label="CLR", color=red]
    dotNET -> binary [label="coreRT"]
    LLVMIR -> run [label="llvm jit", color=red]
    LLVMIR -> assembly [label="llc"]
    assembly -> binary [label="assembler&linker"]
    binary -> run [label="exec", color=red]
    LLVMIR -> WASM [label="llvm"]

    Java -> JVM [label="javac"]
    Kotlin -> JVM [label="kotlinc"]
    OCaml -> JVM [label="ocamlc"]

    Csharp -> dotNET [label="visual studio!?"]
    Fsharp -> dotNET [label="fsc"]

    C -> assembly [label="gcc"]
    erlang -> binary [label="erlc"]
    Cpp -> assembly [label="g++"]
    Go -> binary [label="go"]
    Fortran -> binary [label="gfortran\nifort\npgf77"]
    Kotlin -> LLVMIR [label="kotlinc", color=blue]
    C -> Zig [label="zig", color=blue]
    Cpp -> Zig [label="zig", color=blue]
    Zig -> binary [label="zig"]
    Zig -> LLVMIR [label="zig", color=blue]
    Cpp -> LLVMIR [label="clang"]
    C -> LLVMIR [label="clang"]
    Rust -> LLVMIR [label="rustc"]
    Haskell -> STG -> Cmm [label="ghc"]
    Cmm -> binary [label="ghc", color=blue]
    Cmm -> LLVMIR [label="ghc", color=blue]
    Cmm -> C [label="ghc"]
    JavaScript -> browser [label="v8 and co"]
    JavaScript -> C [label="shermes", color=blue]
    JavaScript -> TypeScript [label="shermes", color=blue]
    TypeScript -> JavaScript [label="tsc"]
    Kotlin -> JavaScript [label="kotlinc", color=blue]
    WASM -> browser [label="v8 and co"]
    Lisp -> run [label="lisp", color=red]
    sh -> run [label="sh", color=red]
    zsh -> run [label="zsh", color=red]
    fsh -> run [label="fsh", color=red]
    bash -> run [label="bash", color=red]
    perl -> run [label="perl", color=red]
    Python -> run [label="cpython", color=red]
}

My favourite approach: Lambda Calculus

My favourite class of approaches is to build a small core language and build everything on top of that. This has a few benefits that I hope to detail here.

TODO: Finish

#![allow(unused)]
fn main() {
#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd, PartialEq)]
enum Term {
    Var(usize),
    App(Box<Term>, Box<Term>),
    Abs(Box<Term>),
}

fn abs(t: Term) -> Term {
    Term::Abs(Box::new(t))
}

fn app(t: Term, a: Term) -> Term {
    Term::App(Box::new(t), Box::new(a))
}

fn var(i: usize) -> Term {
    Term::Var(i)
}

let a_b_gives_a = abs(abs(var(0)));
let a_b_gives_b = abs(abs(var(1)));
println!("A simple term {:?}", a_b_gives_a);
println!("Another simple term {:?}", a_b_gives_b);

fn render(term: &Term) -> String {
    format!("{:?}", term)
}

}
#![allow(unused)]
fn main() {
#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd, PartialEq)]
enum Term {
    Var(usize),
    App(Box<Term>, Box<Term>),
    Abs(Box<Term>),
}

fn abs(t: Term) -> Term {
    Term::Abs(Box::new(t))
}

fn app(t: Term, a: Term) -> Term {
    Term::App(Box::new(t), Box::new(a))
}

fn var(i: usize) -> Term {
    Term::Var(i)
}

let a_b_gives_a = abs(abs(var(0)));
let a_b_gives_b = abs(abs(var(1)));
println!("A simple term {:?}", a_b_gives_a);
println!("Another simple term {:?}", a_b_gives_b);

fn render(term: &Term) -> String {
    format!("{:?}", term)
}

}
#![allow(unused)]
fn main() {
#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd, PartialEq)]
enum Term {
    Var(usize),
    App(Box<Term>, Box<Term>),
    Abs(Box<Term>),
}

fn abs(t: Term) -> Term {
    Term::Abs(Box::new(t))
}

fn app(t: Term, a: Term) -> Term {
    Term::App(Box::new(t), Box::new(a))
}

fn var(i: usize) -> Term {
    Term::Var(i)
}

let a_b_gives_a = abs(abs(var(0)));
let a_b_gives_b = abs(abs(var(1)));
println!("A simple term {:?}", a_b_gives_a);
println!("Another simple term {:?}", a_b_gives_b);

fn render(term: &Term) -> String {
    format!("{:?}", term)
}

}

Fin


Here's the full code for you to experiment with:

#[derive(Debug, Clone, Eq, Hash, Ord, PartialOrd, PartialEq)]
enum Term {
    Var(usize),
    App(Box<Term>, Box<Term>),
    Abs(Box<Term>),
}

fn abs(t: Term) -> Term {
    Term::Abs(Box::new(t))
}

fn app(t: Term, a: Term) -> Term {
    Term::App(Box::new(t), Box::new(a))
}

fn var(i: usize) -> Term {
    Term::Var(i)
}

let a_b_gives_a = abs(abs(var(0)));
let a_b_gives_b = abs(abs(var(1)));
println!("A simple term {:?}", a_b_gives_a);
println!("Another simple term {:?}", a_b_gives_b);

fn render(term: &Term) -> String {
    format!("{:?}", term)
}