Commits

Anonymous committed 5eb6fe4 Draft

cmd/6l: loop alignment, disabled

Saving the code in case we improve things enough that
it matters later, but at least right now it is not worth doing.

R=ken2
CC=golang-dev
http://codereview.appspot.com/6248071

Comments (0)

Files changed (3)

 {
 	thechar = '6',
 	PtrSize = 8,
+	
+	// Loop alignment constants:
+	// want to align loop entry to LoopAlign-byte boundary,
+	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
+	// We define a loop entry as the target of a backward jump.
+	//
+	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
+	// and it aligns all jump targets, not just backward jump targets.
+	//
+	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
+	// is very slight but negative, so the alignment is disabled by
+	// setting MaxLoopPad = 0. The code is here for reference and
+	// for future experiments.
+	// 
+	LoopAlign = 16,
+	MaxLoopPad = 0,
+
 	FuncAlign = 16
 };
 

src/cmd/6l/span.c

 static int	asmode;
 static vlong	vaddr(Adr*, Reloc*);
 
+// single-instruction no-ops of various lengths.
+// constructed by hand and disassembled with gdb to verify.
+// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
+static uchar nop[][16] = {
+	{0x90},
+	{0x66, 0x90},
+	{0x0F, 0x1F, 0x00},
+	{0x0F, 0x1F, 0x40, 0x00},
+	{0x0F, 0x1F, 0x44, 0x00, 0x00},
+	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
+	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+	{0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+static void
+fillnop(uchar *p, int n)
+{
+	int m;
+
+	while(n > 0) {
+		m = n;
+		if(m > nelem(nop))
+			m = nelem(nop);
+		memmove(p, nop[m-1], m);
+		p += m;
+		n -= m;
+	}
+}
+
 void
 span1(Sym *s)
 {
 
 	for(p = s->text; p != P; p = p->link) {
 		p->back = 2;	// use short branches first time through
-		if((q = p->pcond) != P && (q->back & 2))
+		if((q = p->pcond) != P && (q->back & 2)) {
 			p->back |= 1;	// backward jump
+			q->back |= 4;   // loop head
+		}
 
 		if(p->as == AADJSP) {
 			p->to.type = D_SP;
 		s->np = 0;
 		c = 0;
 		for(p = s->text; p != P; p = p->link) {
+			if((p->back & 4) && (c&(LoopAlign-1)) != 0) {
+				// pad with NOPs
+				v = -c&(LoopAlign-1);
+				if(v <= MaxLoopPad) {
+					symgrow(s, c+v);
+					fillnop(s->p+c, v);
+					c += v;
+				}
+			}
+
 			p->pc = c;
 
 			// process forward jumps to p

src/libmach/8db.c

 [0x15] =	{ RM,0,		"UNPCKH%s	%x,%X" },
 [0x16] =	{ RM,0,		"MOV[L]H%s	%x,%X" },	/* TO DO: L if source is XMM */
 [0x17] =	{ RM,0,		"MOVH%s	%X,%x" },
+[0x1F] =	{ RM,0,		"NOP%S	%e" },
 [0x20] =	{ RMR,0,		"MOVL	%C,%e" },
 [0x21] =	{ RMR,0,		"MOVL	%D,%e" },
 [0x22] =	{ RMR,0,		"MOVL	%e,%C" },