Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Armv7-M: Allow register overlap in ldm + ldrd #153

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions examples/naive/armv7m/armv7m_simple0.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,7 @@ smlabt r3,r2, r2, r1
asrs r3, r3,#1
str r3, [r0,#4] // @slothy:writes=a

ldrd r2, r3, [r1, #8]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not actually overlap. Please change to

Suggested change
ldrd r2, r3, [r1, #8]
ldrd r1, r2, [r1, #8]

str r2,[r0]
ldm r0 ,{r0-r3}
end:
134 changes: 70 additions & 64 deletions examples/opt/armv7m/armv7m_simple0_opt_m7.s
Original file line number Diff line number Diff line change
@@ -1,69 +1,75 @@

start:
// Instructions: 24
// Expected cycles: 26
// Expected IPC: 0.92
//
// Cycle bound: 26.0
// IPC bound: 0.92
//
// Wall time: 0.20s
// User time: 0.20s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r6, [r0, #4] // *............................. // @slothy:reads=a
add r10, r2, r6 // .*............................
eor.w r1, r10, r3 // ..*...........................
smlabt r7, r2, r2, r1 // ..*...........................
asrs r5, r7, #1 // ....*.........................
str r5, [r0, #4] // ....*......................... // @slothy:writes=a
ldm r0, {r7,r9,r11} // .....*........................ // @slothy:reads=a
add r8, r9, r7 // ........*.....................
eor.w r2, r8, r11 // .........*....................
smlabt r12, r9, r9, r2 // .........*....................
asrs r11, r12, #1 // ...........*..................
str r11, [r0, #4] // ...........*.................. // @slothy:writes=a
ldm r0, {r7,r8,r10} // ............*................. // @slothy:reads=a
add r6, r8, r7 // ...............*..............
eor.w r5, r6, r10 // ................*.............
smlabt r12, r8, r8, r5 // ................*.............
asrs r9, r12, #1 // ..................*...........
str r9, [r0, #4] // ..................*........... // @slothy:writes=a
ldm r0, {r1,r2,r8} // ...................*.......... // @slothy:reads=a
add r14, r2, r1 // ......................*.......
eor.w r5, r14, r8 // .......................*......
smlabt r10, r2, r2, r5 // .......................*......
asrs r3, r10, #1 // .........................*....
str r3, [r0, #4] // .........................*.... // @slothy:writes=a
// Instructions: 27
// Expected cycles: 27
// Expected IPC: 1.00
//
// Cycle bound: 27.0
// IPC bound: 1.00
//
// Wall time: 0.22s
// User time: 0.22s
//
// ----- cycle (expected) ------>
// 0 25
// |------------------------|----
ldr r4, [r0, #4] // *............................. // @slothy:reads=a
add r4, r2, r4 // .*............................
eor.w r4, r4, r3 // ..*...........................
smlabt r2, r2, r2, r4 // ..*...........................
asrs r2, r2, #1 // ....*.........................
str r2, [r0, #4] // ....*......................... // @slothy:writes=a
ldm r0, {r11,r12,r14} // .....*........................ // @slothy:reads=a
add r2, r12, r11 // ........*.....................
eor.w r2, r2, r14 // .........*....................
smlabt r2, r12, r12, r2 // .........*....................
asrs r2, r2, #1 // ...........*..................
str r2, [r0, #4] // ...........*.................. // @slothy:writes=a
ldm r0, {r11,r12,r14} // ............*................. // @slothy:reads=a
add r2, r12, r11 // ...............*..............
eor.w r2, r2, r14 // ................*.............
smlabt r2, r12, r12, r2 // ................*.............
asrs r2, r2, #1 // ..................*...........
str r2, [r0, #4] // ..................*........... // @slothy:writes=a
ldm r0, {r11,r12,r14} // ...................*.......... // @slothy:reads=a
add r2, r12, r11 // ......................*.......
eor.w r14, r2, r14 // .......................*......
smlabt r6, r12, r12, r14 // .......................*......
ldrd r4, r2, [r14, #8] // ........................*.....
str r4, [r0] // ........................*.....
asrs r2, r6, #1 // .........................*....
str r2, [r0, #4] // .........................*.... // @slothy:writes=a
ldm r0, {r0,r1,r2,r3} // ..........................*...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your output shows that you have not turned on fusion. So the code you have commited isn't actually used.
You need to add a call to fusion_region into the corresponding example in example.py


// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2,r1 // .*.............................
// eor.w r1,r1, r3 // ..*............................
// smlabt r3,r2, r2, r1 // ..*............................
// asrs r3, r3,#1 // ....*..........................
// str r3, [r0,#4] // ....*..........................
// ldm r0, {r1-r2,r14} // .....*.........................
// add r1, r2,r1 // ........*......................
// eor.w r1,r1, r14 // .........*.....................
// smlabt r3,r2, r2, r1 // .........*.....................
// asrs r3, r3,#1 // ...........*...................
// str r3, [r0,#4] // ...........*...................
// ldm r0, {r1-r3} // ............*..................
// add r1, r2,r1 // ...............*...............
// eor.w r1,r1, r3 // ................*..............
// smlabt r3,r2, r2, r1 // ................*..............
// asrs r3, r3,#1 // ..................*............
// str r3, [r0,#4] // ..................*............
// ldm r0, {r1,r2,r3} // ...................*...........
// add r1, r2,r1 // ......................*........
// eor.w r1,r1, r3 // .......................*.......
// smlabt r3,r2, r2, r1 // .......................*.......
// asrs r3, r3,#1 // .........................*.....
// str r3, [r0,#4] // .........................*.....
// ------ cycle (expected) ------>
// 0 25
// |------------------------|-----
// ldr r1, [r0, #4] // *..............................
// add r1, r2,r1 // .*.............................
// eor.w r1,r1, r3 // ..*............................
// smlabt r3,r2, r2, r1 // ..*............................
// asrs r3, r3,#1 // ....*..........................
// str r3, [r0,#4] // ....*..........................
// ldm r0, {r1-r2,r14} // .....*.........................
// add r1, r2,r1 // ........*......................
// eor.w r1,r1, r14 // .........*.....................
// smlabt r3,r2, r2, r1 // .........*.....................
// asrs r3, r3,#1 // ...........*...................
// str r3, [r0,#4] // ...........*...................
// ldm r0, {r1-r3} // ............*..................
// add r1, r2,r1 // ...............*...............
// eor.w r1,r1, r3 // ................*..............
// smlabt r3,r2, r2, r1 // ................*..............
// asrs r3, r3,#1 // ..................*............
// str r3, [r0,#4] // ..................*............
// ldm r0, {r1,r2,r3} // ...................*...........
// add r1, r2,r1 // ......................*........
// eor.w r1,r1, r3 // .......................*.......
// smlabt r3,r2, r2, r1 // .......................*.......
// asrs r3, r3,#1 // .........................*.....
// str r3, [r0,#4] // .........................*.....
// ldrd r2, r3, [r1, #8] // ........................*......
// str r2,[r0] // ........................*......
// ldm r0 ,{r0-r3} // ..........................*....

end:
26 changes: 26 additions & 0 deletions slothy/targets/arm_v7m/arch_v7m.py
Original file line number Diff line number Diff line change
Expand Up @@ -1940,6 +1940,19 @@ def core(inst,t,log=None):
add_comments(inst.source_line.comments)
ldr.source_line = ldr_src

# In case the address register is also contained in the
# register list, we need to overwrite the address register
# in the last ldr
ldrs_reordered = []
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
for ldr, reg in zip(ldrs, regs):
if reg != ptr:
ldrs_reordered.append(ldr)

for ldr, reg in zip(ldrs, regs):
if reg == ptr:
ldrs_reordered.append(ldr)
ldrs = ldrs_reordered

if log is not None:
log(f"ldm splitting: {t.inst}; {[ldr for ldr in ldrs]}")

Expand Down Expand Up @@ -2128,6 +2141,19 @@ def core(inst,t,log=None):
add_comments(inst.source_line.comments)
ldr.source_line = ldr_src

# In case the address register is also contained in the
# register list, we need to overwrite the address register
# in the last ldr
ldrs_reordered = []
mkannwischer marked this conversation as resolved.
Show resolved Hide resolved
for ldr, reg in zip(ldrs, regs):
if reg != ptr:
ldrs_reordered.append(ldr)

for ldr, reg in zip(ldrs, regs):
if reg == ptr:
ldrs_reordered.append(ldr)
ldrs = ldrs_reordered

if log is not None:
log(f"ldrd splitting: {t.inst}; {[ldr for ldr in ldrs]}")

Expand Down