scripts: Enhance glibcpp to do basic macro processing

Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
This commit is contained in:
Florian Weimer 2022-09-22 12:10:41 +02:00
parent f40c7887d3
commit e6e6184bed
3 changed files with 542 additions and 2 deletions

View File

@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings.
"""
import collections
import operator
import re
import sys
# Caution: The order of the outermost alternation matters.
# STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter):
yield tok
pos = mo.end()
class MacroDefinition(collections.namedtuple('MacroDefinition',
'name_token args body error')):
"""A preprocessor macro definition.
name_token is the Token_ for the name.
args is None for a macro that is not function-like. Otherwise, it
is a tuple that contains the macro argument name tokens.
body is a tuple that contains the tokens that constitue the body
of the macro definition (excluding whitespace).
error is None if no error was detected, or otherwise a problem
description associated with this macro definition.
"""
@property
def function(self):
"""Return true if the macro is function-like."""
return self.args is not None
@property
def name(self):
"""Return the name of the macro being defined."""
return self.name_token.text
@property
def line(self):
"""Return the line number of the macro defintion."""
return self.name_token.line
@property
def args_lowered(self):
"""Return the macro argument list as a list of strings"""
if self.function:
return [token.text for token in self.args]
else:
return None
@property
def body_lowered(self):
"""Return the macro body as a list of strings."""
return [token.text for token in self.body]
def macro_definitions(tokens):
"""A generator for C macro definitions among tokens.
The generator yields MacroDefinition objects.
tokens must be iterable, yielding Token_ objects.
"""
macro_name = None
macro_start = False # Set to false after macro name and one otken.
macro_args = None # Set to a list during the macro argument sequence.
in_macro_args = False # True while processing macro identifier-list.
error = None
body = []
for token in tokens:
if token.context == 'define' and macro_name is None \
and token.kind == 'IDENT':
# Starting up macro processing.
if macro_start:
# First identifier is the macro name.
macro_name = token
else:
# Next token is the name.
macro_start = True
continue
if macro_name is None:
# Drop tokens not in macro definitions.
continue
if token.context != 'define':
# End of the macro definition.
if in_macro_args and error is None:
error = 'macro definition ends in macro argument list'
yield MacroDefinition(macro_name, macro_args, tuple(body), error)
# No longer in a macro definition.
macro_name = None
macro_start = False
macro_args = None
in_macro_args = False
error = None
body.clear()
continue
if macro_start:
# First token after the macro name.
macro_start = False
if token.kind == 'PUNCTUATOR' and token.text == '(':
macro_args = []
in_macro_args = True
continue
if in_macro_args:
if token.kind == 'IDENT' \
or (token.kind == 'PUNCTUATOR' and token.text == '...'):
# Macro argument or ... placeholder.
macro_args.append(token)
if token.kind == 'PUNCTUATOR':
if token.text == ')':
macro_args = tuple(macro_args)
in_macro_args = False
elif token.text == ',':
pass # Skip. Not a full syntax check.
elif error is None:
error = 'invalid punctuator in macro argument list: ' \
+ repr(token.text)
elif error is None:
error = 'invalid {} token in macro argument list'.format(
token.kind)
continue
if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
body.append(token)
# Emit the macro in case the last line does not end with a newline.
if macro_name is not None:
if in_macro_args and error is None:
error = 'macro definition ends in macro argument list'
yield MacroDefinition(macro_name, macro_args, tuple(body), error)
# Used to split UL etc. suffixes from numbers such as 123UL.
RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
BINARY_OPERATORS = {
'+': operator.add,
'<<': operator.lshift,
}
# Use the general-purpose dict type if it is order-preserving.
if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
OrderedDict = collections.OrderedDict
else:
OrderedDict = dict
def macro_eval(macro_defs, reporter):
"""Compute macro values
macro_defs is the output from macro_definitions. reporter is an
object that accepts reporter.error(line_number, message) and
reporter.note(line_number, message) calls to report errors
and error context invocations.
The returned dict contains the values of macros which are not
function-like, pairing their names with their computed values.
The current implementation is incomplete. It is deliberately not
entirely faithful to C, even in the implemented parts. It checks
that macro replacements follow certain syntactic rules even if
they are never evaluated.
"""
# Unevaluated macro definitions by name.
definitions = OrderedDict()
for md in macro_defs:
if md.name in definitions:
reporter.error(md.line, 'macro {} redefined'.format(md.name))
reporter.note(definitions[md.name].line,
'location of previous definition')
else:
definitions[md.name] = md
# String to value mappings for fully evaluated macros.
evaluated = OrderedDict()
# String to macro definitions during evaluation. Nice error
# reporting relies on determinstic iteration order.
stack = OrderedDict()
def eval_token(current, token):
"""Evaluate one macro token.
Integers and strings are returned as such (the latter still
quoted). Identifiers are expanded.
None indicates an empty expansion or an error.
"""
if token.kind == 'PP_NUMBER':
value = None
m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
if m:
try:
value = int(m.group(1), 0)
except ValueError:
pass
if value is None:
reporter.error(token.line,
'invalid number {!r} in definition of {}'.format(
token.text, current.name))
return value
if token.kind == 'STRING':
return token.text
if token.kind == 'CHARCONST' and len(token.text) == 3:
return ord(token.text[1])
if token.kind == 'IDENT':
name = token.text
result = eval1(current, name)
if name not in evaluated:
evaluated[name] = result
return result
reporter.error(token.line,
'unrecognized {!r} in definition of {}'.format(
token.text, current.name))
return None
def eval1(current, name):
"""Evaluate one name.
The name is looked up and the macro definition evaluated
recursively if necessary. The current argument is the macro
definition being evaluated.
None as a return value indicates an error.
"""
# Fast path if the value has already been evaluated.
if name in evaluated:
return evaluated[name]
try:
md = definitions[name]
except KeyError:
reporter.error(current.line,
'reference to undefined identifier {} in definition of {}'
.format(name, current.name))
return None
if md.name in stack:
# Recursive macro definition.
md = stack[name]
reporter.error(md.line,
'macro definition {} refers to itself'.format(md.name))
for md1 in reversed(list(stack.values())):
if md1 is md:
break
reporter.note(md1.line,
'evaluated from {}'.format(md1.name))
return None
stack[md.name] = md
if md.function:
reporter.error(current.line,
'attempt to evaluate function-like macro {}'.format(name))
reporter.note(md.line, 'definition of {}'.format(md.name))
return None
try:
body = md.body
if len(body) == 0:
# Empty expansion.
return None
# Remove surrounding ().
if body[0].text == '(' and body[-1].text == ')':
body = body[1:-1]
had_parens = True
else:
had_parens = False
if len(body) == 1:
return eval_token(md, body[0])
# Minimal expression evaluator for binary operators.
op = body[1].text
if len(body) == 3 and op in BINARY_OPERATORS:
if not had_parens:
reporter.error(body[1].line,
'missing parentheses around {} expression'.format(op))
reporter.note(md.line,
'in definition of macro {}'.format(md.name))
left = eval_token(md, body[0])
right = eval_token(md, body[2])
if type(left) != type(1):
reporter.error(left.line,
'left operand of {} is not an integer'.format(op))
reporter.note(md.line,
'in definition of macro {}'.format(md.name))
if type(right) != type(1):
reporter.error(left.line,
'right operand of {} is not an integer'.format(op))
reporter.note(md.line,
'in definition of macro {}'.format(md.name))
return BINARY_OPERATORS[op](left, right)
reporter.error(md.line,
'uninterpretable macro token sequence: {}'.format(
' '.join(md.body_lowered)))
return None
finally:
del stack[md.name]
# Start of main body of macro_eval.
for md in definitions.values():
name = md.name
if name not in evaluated and not md.function:
evaluated[name] = eval1(md, name)
return evaluated

View File

@ -274,12 +274,12 @@ $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o
tests = \
README-testing \
tst-support-namespace \
tst-support-open-dev-null-range \
tst-support-process_state \
tst-support_blob_repeat \
tst-support_capture_subprocess \
tst-support_descriptors \
tst-support_format_dns_packet \
tst-support-open-dev-null-range \
tst-support-process_state \
tst-support_quote_blob \
tst-support_quote_blob_wide \
tst-support_quote_string \
@ -304,6 +304,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
$(evaluate-test)
endif
tests-special += $(objpfx)tst-glibcpp.out
$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py
PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \
$(evaluate-test)
$(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd)

217
support/tst-glibcpp.py Normal file
View File

@ -0,0 +1,217 @@
#! /usr/bin/python3
# Tests for scripts/glibcpp.py
# Copyright (C) 2022 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
import inspect
import sys
import glibcpp
# Error counter.
errors = 0
class TokenizerErrors:
"""Used as the error reporter during tokenization."""
def __init__(self):
self.errors = []
def error(self, token, message):
self.errors.append((token, message))
def check_macro_definitions(source, expected):
reporter = TokenizerErrors()
tokens = glibcpp.tokenize_c(source, reporter)
actual = []
for md in glibcpp.macro_definitions(tokens):
if md.function:
md_name = '{}({})'.format(md.name, ','.join(md.args_lowered))
else:
md_name = md.name
actual.append((md_name, md.body_lowered))
if actual != expected or reporter.errors:
global errors
errors += 1
# Obtain python source line information.
frame = inspect.stack(2)[1]
print('{}:{}: error: macro definition mismatch, actual definitions:'
.format(frame[1], frame[2]))
for md in actual:
print('note: {} {!r}'.format(md[0], md[1]))
if reporter.errors:
for err in reporter.errors:
print('note: tokenizer error: {}: {}'.format(
err[0].line, err[1]))
def check_macro_eval(source, expected, expected_errors=''):
reporter = TokenizerErrors()
tokens = list(glibcpp.tokenize_c(source, reporter))
if reporter.errors:
# Obtain python source line information.
frame = inspect.stack(2)[1]
for err in reporter.errors:
print('{}:{}: tokenizer error: {}: {}'.format(
frame[1], frame[2], err[0].line, err[1]))
return
class EvalReporter:
"""Used as the error reporter during evaluation."""
def __init__(self):
self.lines = []
def error(self, line, message):
self.lines.append('{}: error: {}\n'.format(line, message))
def note(self, line, message):
self.lines.append('{}: note: {}\n'.format(line, message))
reporter = EvalReporter()
actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
actual_errors = ''.join(reporter.lines)
if actual != expected or actual_errors != expected_errors:
global errors
errors += 1
# Obtain python source line information.
frame = inspect.stack(2)[1]
print('{}:{}: error: macro evaluation mismatch, actual results:'
.format(frame[1], frame[2]))
for k, v in actual.items():
print(' {}: {!r}'.format(k, v))
for msg in reporter.lines:
sys.stdout.write(' | ' + msg)
# Individual test cases follow.
check_macro_definitions('', [])
check_macro_definitions('int main()\n{\n{\n', [])
check_macro_definitions("""
#define A 1
#define B 2 /* ignored */
#define C 3 // also ignored
#define D \
4
#define STRING "string"
#define FUNCLIKE(a, b) (a + b)
#define FUNCLIKE2(a, b) (a + \
b)
""", [('A', ['1']),
('B', ['2']),
('C', ['3']),
('D', ['4']),
('STRING', ['"string"']),
('FUNCLIKE(a,b)', list('(a+b)')),
('FUNCLIKE2(a,b)', list('(a+b)')),
])
check_macro_definitions('#define MACRO', [('MACRO', [])])
check_macro_definitions('#define MACRO\n', [('MACRO', [])])
check_macro_definitions('#define MACRO()', [('MACRO()', [])])
check_macro_definitions('#define MACRO()\n', [('MACRO()', [])])
check_macro_eval('#define A 1', {'A': 1})
check_macro_eval('#define A (1)', {'A': 1})
check_macro_eval('#define A (1 + 1)', {'A': 2})
check_macro_eval('#define A (1U << 31)', {'A': 1 << 31})
check_macro_eval('''\
#define A (B + 1)
#define B 10
#define F(x) ignored
#define C "not ignored"
''', {
'A': 11,
'B': 10,
'C': '"not ignored"',
})
# Checking for evaluation errors.
check_macro_eval('''\
#define A 1
#define A 2
''', {
'A': 1,
}, '''\
2: error: macro A redefined
1: note: location of previous definition
''')
check_macro_eval('''\
#define A A
#define B 1
''', {
'A': None,
'B': 1,
}, '''\
1: error: macro definition A refers to itself
''')
check_macro_eval('''\
#define A B
#define B A
''', {
'A': None,
'B': None,
}, '''\
1: error: macro definition A refers to itself
2: note: evaluated from B
''')
check_macro_eval('''\
#define A B
#define B C
#define C A
''', {
'A': None,
'B': None,
'C': None,
}, '''\
1: error: macro definition A refers to itself
3: note: evaluated from C
2: note: evaluated from B
''')
check_macro_eval('''\
#define A 1 +
''', {
'A': None,
}, '''\
1: error: uninterpretable macro token sequence: 1 +
''')
check_macro_eval('''\
#define A 3*5
''', {
'A': None,
}, '''\
1: error: uninterpretable macro token sequence: 3 * 5
''')
check_macro_eval('''\
#define A 3 + 5
''', {
'A': 8,
}, '''\
1: error: missing parentheses around + expression
1: note: in definition of macro A
''')
if errors:
sys.exit(1)