下篇就是将IR转成汇编语言了... ...
import
llvmlite.ir as ir
import
llvmlite.binding as llvm
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
module
=
ir.Module(name
=
"my_module"
)
context
=
ir.Context()
function
=
None
builder
=
None
class
Token:
def
__init__(
self
,
type
, value
=
None
):
self
.
type
=
type
self
.value
=
value
class
Lexer:
def
__init__(
self
, text):
self
.text
=
text
self
.pos
=
0
def
get_next_token(
self
):
if
self
.pos >
=
len
(
self
.text):
return
Token(
"EOF"
)
current_char
=
self
.text[
self
.pos]
if
current_char.isalpha():
identifier
=
""
while
self
.pos <
len
(
self
.text)
and
self
.text[
self
.pos].isalnum():
identifier
+
=
self
.text[
self
.pos]
self
.pos
+
=
1
return
Token(
"IDENTIFIER"
, identifier)
if
current_char.isdigit():
self
.pos
+
=
1
return
Token(
"NUMBER"
,
int
(current_char))
if
current_char
in
"+-*/"
:
self
.pos
+
=
1
return
Token(
"OPERATOR"
, current_char)
if
current_char
=
=
"="
:
self
.pos
+
=
1
return
Token(
"ASSIGN"
,
"="
)
if
current_char
=
=
";"
:
self
.pos
+
=
1
return
Token(
"SEMICOLON"
,
";"
)
if
current_char
=
=
"("
:
self
.pos
+
=
1
return
Token(
"LPAREN"
,
"("
)
if
current_char
=
=
")"
:
self
.pos
+
=
1
return
Token(
"RPAREN"
,
")"
)
if
current_char
in
" \t"
:
self
.pos
+
=
1
return
self
.get_next_token()
raise
ValueError(
"Invalid character"
)
class
Parser:
def
__init__(
self
, lexer):
self
.lexer
=
lexer
self
.current_token
=
self
.lexer.get_next_token()
self
.variables
=
{}
def
parse(
self
):
global
builder, function
results
=
[]
main_function_type
=
ir.FunctionType(ir.IntType(
32
), ())
function
=
ir.Function(module, main_function_type, name
=
"main"
)
block
=
function.append_basic_block(name
=
"entry"
)
builder
=
ir.IRBuilder(block)
while
self
.current_token.
type
!
=
"EOF"
:
result
=
self
.parse_statement()
results.append(result)
if
self
.current_token.
type
=
=
"SEMICOLON"
:
self
.eat(
"SEMICOLON"
)
builder.ret(results[
-
1
])
return
results
def
parse_expression(
self
, min_precedence
=
0
):
left
=
self
.parse_atom()
while
self
.current_token.
type
=
=
"OPERATOR"
and
self
.precedence(
self
.current_token.value) >
=
min_precedence:
operator
=
self
.current_token.value
self
.eat(
"OPERATOR"
)
right
=
self
.parse_expression(
self
.precedence(operator)
+
1
)
if
operator
=
=
"+"
:
result
=
builder.add(left, right, name
=
"addtmp"
)
elif
operator
=
=
"-"
:
result
=
builder.sub(left, right, name
=
"subtmp"
)
elif
operator
=
=
"*"
:
result
=
builder.mul(left, right, name
=
"multmp"
)
elif
operator
=
=
"/"
:
result
=
builder.sdiv(left, right, name
=
"divtmp"
)
left
=
result
return
left
def
parse_atom(
self
):
if
self
.current_token.
type
=
=
"NUMBER"
:
value
=
self
.current_token.value
self
.eat(
"NUMBER"
)
return
ir.Constant(ir.IntType(
32
), value)
elif
self
.current_token.
type
=
=
"IDENTIFIER"
:
variable_name
=
self
.current_token.value
self
.eat(
"IDENTIFIER"
)
if
variable_name
in
self
.variables:
return
self
.variables[variable_name]
else
:
raise
ValueError(f
"Undefined variable: {variable_name}"
)
elif
self
.current_token.
type
=
=
"LPAREN"
:
self
.eat(
"LPAREN"
)
expression
=
self
.parse_expression()
self
.eat(
"RPAREN"
)
return
expression
else
:
raise
ValueError(
"Invalid syntax"
)
def
parse_statement(
self
):
if
self
.current_token.
type
=
=
"IDENTIFIER"
:
variable_name
=
self
.current_token.value
self
.eat(
"IDENTIFIER"
)
self
.eat(
"ASSIGN"
)
expression_value
=
self
.parse_expression()
self
.eat(
"SEMICOLON"
)
self
.variables[variable_name]
=
expression_value
return
expression_value
else
:
return
self
.parse_expression()
def
eat(
self
, token_type):
if
self
.current_token.
type
=
=
token_type:
self
.current_token
=
self
.lexer.get_next_token()
else
:
raise
ValueError(
"Unexpected token"
)
def
precedence(
self
, operator):
precedence
=
{
"+"
:
1
,
"-"
:
1
,
"*"
:
2
,
"/"
:
2
}
return
precedence.get(operator,
0
)
def
apply_operator(
self
, left, operator, right):
if
operator
=
=
"+"
:
return
left
+
right
elif
operator
=
=
"-"
:
return
left
-
right
elif
operator
=
=
"*"
:
return
left
*
right
elif
operator
=
=
"/"
:
return
left
/
right
def
calculate(expression):
lexer
=
Lexer(expression)
parser
=
Parser(lexer)
results
=
parser.parse()
formatted_ir
=
str
(module)
return
results, formatted_ir
expression
=
"x = 3 * (4-1); y = 2*x + 2;"
llvm_ir
=
calculate(expression)
print
(
"\n------------------------------------------"
)
print
(
"LLVM IR的内部表示:\n"
, llvm_ir[
0
])
print
(
"\n------------------------------------------"
)
print
(
"LLVM IR的内部表示格式化和排版的LLVM IR汇编代码:\n"
, llvm_ir[
1
])