In this section we try to describe a few of the many transformations that are applied to a C program to convert it to CIL. The module that implements this conversion is about 5000 lines of OCaml code. In contrast a simple program transformation that instruments all functions to keep a shadow stack of the true return address (thus preventing stack smashing) is only 70 lines of code. This example shows that the analysis is so much simpler because it has to handle only a few simple C constructs and also because it can leverage on CIL infrastructure such as visitors and pretty-printers.
In no particular order these are a few of the most significant ways in which C programs are compiled into CIL:
int long signed x;
signed long extern x;
long static int long y;
// Some code that uses these declaration, so that CIL does not remove them
int main() { return x + y; }
See the CIL output for this
code fragment struct { int x; } s;
See the CIL output for this
code fragmentstruct foo {
struct bar {
union baz {
int x1;
double x2;
} u1;
int y;
} s1;
int z;
} f;
See the CIL output for this code fragment
int main() {
struct foo {
int x; } foo;
{
struct foo {
double d;
};
return foo.x;
}
}
See the CIL output for this code fragment
int f(); // Prototype without arguments
int f(double x) {
return g(x);
}
int g(double x) {
return x;
}
See the CIL output for this
code fragment int a1[] = {1,2,3};
int a2[sizeof(int) >= 4 ? 8 : 16];
See the CIL output for this
code fragmentint main() {
enum {
FIVE = 5,
SIX, SEVEN,
FOUR = FIVE - 1,
EIGHT = sizeof(double)
} x = FIVE;
return x;
}
See the CIL output for this
code fragment int a1[5] = {1,2,3};
struct foo { int x, y; } s1 = { 4 };
See the CIL output for this
code fragment struct foo {
int x, y;
int a[5];
struct inner {
int z;
} inner;
} s = { 0, .inner.z = 3, .a[1 ... 2] = 5, 4, y : 8 };
See the CIL output for this
code fragmentchar foo[] = "foo plus bar";
See the CIL output for this code fragment
char *foo = "foo " " plus " " bar ";
See the CIL output for this code fragment
int x = 5;
struct foo { int f1, f2; } a [] = {1, 2, 3, 4, 5 };
See the CIL output for this code fragment
int x = 5;
int main() {
int x = 6;
{
int x = 7;
return x;
}
return x;
}
See the CIL output for this code fragment
int x = 5;
int main() {
int x = 6;
{
static int x = 7;
return x;
}
return x;
}
See the CIL output for this
code fragment int foo() {
int x = 5;
}
See the CIL output for this
code fragment int x, f(int);
return (x ++ + f(x));
See the CIL output for this code fragment
Internally, the x ++ statement is turned into an assignment which the pretty-printer prints like the original. CIL has only three forms of basic statements: assignments, function calls and inline assembly.
int x;
int y = x ? 2 : 4;
int z = x || y;
// Here we duplicate the return statement
if(x && y) { return 0; } else { return 1; }
// To avoid excessive duplication, CIL uses goto's for
// statement that have more than 5 instructions
if(x && y || z) { x ++; y ++; z ++; x ++; y ++; return z; }
See the CIL output for this
code fragment int f();;
return f() ? : 4;
See the CIL output for this
code fragment int x, y;
for(int i = 0; i<5; i++) {
if(i == 5) continue;
if(i == 4) break;
i += 2;
}
while(x < 5) {
if(x == 3) continue;
x ++;
}
See the CIL output for this
code fragment int x = 5, y = x;
int z = ({ x++; L: y -= x; y;});
return ({ goto L; 0; });
See the CIL output for this code fragment
int x, y, z;
return &(x ? y : z) - & (x ++, x);
See the CIL output for this code fragment
#include <stdio.h>
typedef int unused_type;
static char unused_static (void) { return 0; }
int main() {
int unused_local;
printf("Hello world\n"); // Only printf will be kept from stdio.h
}
See the CIL output for this
code fragment