OpenQASM 3.0 Grammar¶
OpenQASM 3.0 Grammar specification based in ANTLR parser generator.
The ANTLR grammar is intended to serve as the official reference grammar for OpenQASM3 and defines the set of syntactically valid statements in the language. ANTLR is used because it provides a human-readable EBNF format that is computer-validatable. It provides an auto-generated parser that is used to validate the example files (see openqasm/examples/). As an extension, it can be used to check whether a source file is parseable OpenQASM3 (note this does not ensure that the OpenQASM3 is semantically correct).
The ANTLR parser does not attempt to be performant or construct an AST. Furthermore, it does not conduct any semantic analysis.
The ANTLR grammar may serve as a guide when developing compiler tooling for OpenQASM3.
1parser grammar qasm3Parser;
2
3options {
4 tokenVocab = qasm3Lexer;
5}
6
7program: version? statementOrScope* EOF;
8version: OPENQASM VersionSpecifier SEMICOLON;
9
10// A statement is any valid single statement of an OpenQASM 3 program, with the
11// exception of the version-definition statement (which must be unique, and the
12// first statement of the file if present). This file just defines rules for
13// parsing; we leave semantic analysis and rejection of invalid scopes for
14// compiler implementations.
15statement:
16 pragma
17 // All the actual statements of the language.
18 | annotation* (
19 aliasDeclarationStatement
20 | assignmentStatement
21 | barrierStatement
22 | boxStatement
23 | breakStatement
24 | calStatement
25 | calibrationGrammarStatement
26 | classicalDeclarationStatement
27 | constDeclarationStatement
28 | continueStatement
29 | defStatement
30 | defcalStatement
31 | delayStatement
32 | endStatement
33 | expressionStatement
34 | externStatement
35 | forStatement
36 | gateCallStatement
37 | gateStatement
38 | ifStatement
39 | includeStatement
40 | ioDeclarationStatement
41 | measureArrowAssignmentStatement
42 | nopStatement
43 | oldStyleDeclarationStatement
44 | quantumDeclarationStatement
45 | resetStatement
46 | returnStatement
47 | switchStatement
48 | whileStatement
49 )
50;
51annotation: AnnotationKeyword RemainingLineContent?;
52scope: LBRACE statementOrScope* RBRACE;
53pragma: PRAGMA RemainingLineContent;
54
55statementOrScope: statement | scope;
56
57
58/* Start top-level statement definitions. */
59
60// Inclusion statements.
61calibrationGrammarStatement: DEFCALGRAMMAR StringLiteral SEMICOLON;
62includeStatement: INCLUDE StringLiteral SEMICOLON;
63
64// Control-flow statements.
65breakStatement: BREAK SEMICOLON;
66continueStatement: CONTINUE SEMICOLON;
67endStatement: END SEMICOLON;
68forStatement: FOR scalarType Identifier IN (setExpression | LBRACKET rangeExpression RBRACKET | expression) body=statementOrScope;
69ifStatement: IF LPAREN expression RPAREN if_body=statementOrScope (ELSE else_body=statementOrScope)?;
70returnStatement: RETURN (expression | measureExpression)? SEMICOLON;
71whileStatement: WHILE LPAREN expression RPAREN body=statementOrScope;
72switchStatement: SWITCH LPAREN expression RPAREN LBRACE switchCaseItem* RBRACE;
73switchCaseItem:
74 CASE expressionList scope
75 | DEFAULT scope
76;
77
78// Quantum directive statements.
79barrierStatement: BARRIER gateOperandList? SEMICOLON;
80boxStatement: BOX designator? scope;
81delayStatement: DELAY designator gateOperandList? SEMICOLON;
82nopStatement: NOP gateOperandList? SEMICOLON;
83/* `gateCallStatement` is split in two to avoid a potential ambiguity with an
84 * `expressionStatement` that consists of a single function call. The only
85 * "gate" that can have no operands is `gphase` with no control modifiers, and
86 * `gphase(pi);` looks grammatically identical to `fn(pi);`. We disambiguate by
87 * having `gphase` be its own token, and requiring that all other gate calls
88 * grammatically have at least one qubit. Strictly, as long as `gphase` is a
89 * separate token, ANTLR can disambiguate the statements by the definition
90 * order, but this is more robust. */
91gateCallStatement:
92 gateModifier* Identifier (LPAREN expressionList? RPAREN)? designator? gateOperandList SEMICOLON
93 | gateModifier* GPHASE (LPAREN expressionList? RPAREN)? designator? gateOperandList? SEMICOLON
94;
95// measureArrowAssignmentStatement also permits the case of not assigning the
96// result to any classical value too.
97measureArrowAssignmentStatement: measureExpression (ARROW indexedIdentifier)? SEMICOLON;
98resetStatement: RESET gateOperand SEMICOLON;
99
100// Primitive declaration statements.
101aliasDeclarationStatement: LET Identifier EQUALS aliasExpression SEMICOLON;
102classicalDeclarationStatement: (scalarType | arrayType) Identifier (EQUALS declarationExpression)? SEMICOLON;
103constDeclarationStatement: CONST scalarType Identifier EQUALS declarationExpression SEMICOLON;
104ioDeclarationStatement: (INPUT | OUTPUT) (scalarType | arrayType) Identifier SEMICOLON;
105oldStyleDeclarationStatement: (CREG | QREG) Identifier designator? SEMICOLON;
106quantumDeclarationStatement: qubitType Identifier SEMICOLON;
107
108// Declarations and definitions of higher-order objects.
109defStatement: DEF Identifier LPAREN argumentDefinitionList? RPAREN returnSignature? scope;
110externStatement: EXTERN Identifier LPAREN externArgumentList? RPAREN returnSignature? SEMICOLON;
111gateStatement: GATE Identifier (LPAREN params=identifierList? RPAREN)? qubits=identifierList scope;
112
113// Non-declaration assignments and calculations.
114assignmentStatement: indexedIdentifier op=(EQUALS | CompoundAssignmentOperator) (expression | measureExpression) SEMICOLON;
115expressionStatement: expression SEMICOLON;
116
117// Statements where the bulk is in the calibration language.
118calStatement: CAL LBRACE CalibrationBlock? RBRACE;
119defcalStatement: DEFCAL defcalTarget (LPAREN defcalArgumentDefinitionList? RPAREN)? defcalOperandList returnSignature? LBRACE CalibrationBlock? RBRACE;
120
121
122/* End top-level statement definitions. */
123/* Start expression definitions. */
124
125
126// ANTLR4 can handle direct left-recursive rules, and ambiguities are guaranteed
127// to resolve in the order of definition. This means that the order of rules
128// here defines the precedence table, from most tightly binding to least.
129expression:
130 LPAREN expression RPAREN # parenthesisExpression
131 | expression indexOperator # indexExpression
132 | <assoc=right> expression op=DOUBLE_ASTERISK expression # powerExpression
133 | op=(TILDE | EXCLAMATION_POINT | MINUS) expression # unaryExpression
134 | expression op=(ASTERISK | SLASH | PERCENT) expression # multiplicativeExpression
135 | expression op=(PLUS | MINUS) expression # additiveExpression
136 | expression op=BitshiftOperator expression # bitshiftExpression
137 | expression op=ComparisonOperator expression # comparisonExpression
138 | expression op=EqualityOperator expression # equalityExpression
139 | expression op=AMPERSAND expression # bitwiseAndExpression
140 | expression op=CARET expression # bitwiseXorExpression
141 | expression op=PIPE expression # bitwiseOrExpression
142 | expression op=DOUBLE_AMPERSAND expression # logicalAndExpression
143 | expression op=DOUBLE_PIPE expression # logicalOrExpression
144 | (scalarType | arrayType) LPAREN expression RPAREN # castExpression
145 | DURATIONOF LPAREN scope RPAREN # durationofExpression
146 | Identifier LPAREN expressionList? RPAREN # callExpression
147 | (
148 Identifier
149 | BinaryIntegerLiteral
150 | OctalIntegerLiteral
151 | DecimalIntegerLiteral
152 | HexIntegerLiteral
153 | FloatLiteral
154 | ImaginaryLiteral
155 | BooleanLiteral
156 | BitstringLiteral
157 | TimingLiteral
158 | HardwareQubit
159 ) # literalExpression
160;
161
162// Special-case expressions that are only valid in certain contexts. These are
163// not in the expression tree, but can contain elements that are within it.
164aliasExpression: expression (DOUBLE_PLUS expression)*;
165declarationExpression: arrayLiteral | expression | measureExpression;
166measureExpression: MEASURE gateOperand;
167rangeExpression: expression? COLON expression? (COLON expression)?;
168setExpression: LBRACE expression (COMMA expression)* COMMA? RBRACE;
169arrayLiteral: LBRACE ((expression | arrayLiteral) (COMMA (expression | arrayLiteral))* COMMA?)? RBRACE;
170
171// The general form is a comma-separated list of indexing entities.
172// `setExpression` is only valid when being used as a single index: registers
173// can support it for creating aliases, but arrays cannot.
174indexOperator:
175 LBRACKET
176 (
177 setExpression
178 | (expression | rangeExpression) (COMMA (expression | rangeExpression))* COMMA?
179 )
180 RBRACKET;
181// Alternative form to `indexExpression` for cases where an obvious l-value is
182// better grammatically than a generic expression. Some current uses of this
183// rule may be better as `expression`, leaving the semantic analysis to later
184// (for example in gate calls).
185indexedIdentifier: Identifier indexOperator*;
186
187/* End expression definitions. */
188/* Start type definitions. */
189
190returnSignature: ARROW scalarType;
191gateModifier: (
192 INV
193 | POW LPAREN expression RPAREN
194 | (CTRL | NEGCTRL) (LPAREN expression RPAREN)?
195) AT;
196
197scalarType:
198 BIT designator?
199 | INT designator?
200 | UINT designator?
201 | FLOAT designator?
202 | ANGLE designator?
203 | BOOL
204 | DURATION
205 | STRETCH
206 | COMPLEX (LBRACKET scalarType RBRACKET)?
207;
208qubitType: QUBIT designator?;
209arrayType: ARRAY LBRACKET scalarType COMMA expressionList RBRACKET;
210arrayReferenceType: (READONLY | MUTABLE) ARRAY LBRACKET scalarType COMMA (expressionList | DIM EQUALS expression) RBRACKET;
211
212designator: LBRACKET expression RBRACKET;
213
214defcalTarget: MEASURE | RESET | DELAY | Identifier;
215defcalArgumentDefinition: expression | argumentDefinition;
216defcalOperand: HardwareQubit | Identifier;
217gateOperand: indexedIdentifier | HardwareQubit;
218externArgument: scalarType | arrayReferenceType | CREG designator?;
219argumentDefinition:
220 scalarType Identifier
221 | qubitType Identifier
222 | (CREG | QREG) Identifier designator?
223 | arrayReferenceType Identifier
224;
225
226argumentDefinitionList: argumentDefinition (COMMA argumentDefinition)* COMMA?;
227defcalArgumentDefinitionList: defcalArgumentDefinition (COMMA defcalArgumentDefinition)* COMMA?;
228defcalOperandList: defcalOperand (COMMA defcalOperand)* COMMA?;
229expressionList: expression (COMMA expression)* COMMA?;
230identifierList: Identifier (COMMA Identifier)* COMMA?;
231gateOperandList: gateOperand (COMMA gateOperand)* COMMA?;
232externArgumentList: externArgument (COMMA externArgument)* COMMA?;
1lexer grammar qasm3Lexer;
2
3/* Naming conventions in this lexer grammar
4 *
5 * - Keywords and exact symbols that have only one possible value are written in
6 * all caps. There is no more information in the parsed text than in the name
7 * of the lexeme. For example, `INCLUDE` is only ever the string `'include'`.
8 *
9 * - Lexemes with information in the string form are in PascalCase. This
10 * indicates there is more information in the token than just the name. For
11 * example, `Identifier` has a payload containing the name of the identifier.
12 */
13
14/* Language keywords. */
15
16OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
17INCLUDE: 'include' -> pushMode(ARBITRARY_STRING);
18DEFCALGRAMMAR: 'defcalgrammar' -> pushMode(ARBITRARY_STRING);
19DEF: 'def';
20CAL: 'cal' -> mode(CAL_PRELUDE);
21DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
22GATE: 'gate';
23EXTERN: 'extern';
24BOX: 'box';
25LET: 'let';
26
27BREAK: 'break';
28CONTINUE: 'continue';
29IF: 'if';
30ELSE: 'else';
31END: 'end';
32RETURN: 'return';
33FOR: 'for';
34WHILE: 'while';
35IN: 'in';
36SWITCH: 'switch';
37CASE: 'case';
38DEFAULT: 'default';
39NOP: 'nop';
40
41PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
42AnnotationKeyword: '@' Identifier ('.' Identifier)* -> pushMode(EAT_TO_LINE_END);
43
44
45/* Types. */
46
47INPUT: 'input';
48OUTPUT: 'output';
49CONST: 'const';
50READONLY: 'readonly';
51MUTABLE: 'mutable';
52
53QREG: 'qreg';
54QUBIT: 'qubit';
55
56CREG: 'creg';
57BOOL: 'bool';
58BIT: 'bit';
59INT: 'int';
60UINT: 'uint';
61FLOAT: 'float';
62ANGLE: 'angle';
63COMPLEX: 'complex';
64ARRAY: 'array';
65VOID: 'void';
66
67DURATION: 'duration';
68STRETCH: 'stretch';
69
70
71/* Builtin identifiers and operations */
72
73GPHASE: 'gphase';
74INV: 'inv';
75POW: 'pow';
76CTRL: 'ctrl';
77NEGCTRL: 'negctrl';
78
79DIM: '#dim';
80
81DURATIONOF: 'durationof';
82
83DELAY: 'delay';
84RESET: 'reset';
85MEASURE: 'measure';
86BARRIER: 'barrier';
87
88BooleanLiteral: 'true' | 'false';
89
90
91/* Symbols */
92
93LBRACKET: '[';
94RBRACKET: ']';
95LBRACE: '{';
96RBRACE: '}';
97LPAREN: '(';
98RPAREN: ')';
99
100COLON: ':';
101SEMICOLON: ';';
102
103DOT: '.';
104COMMA: ',';
105
106EQUALS: '=';
107ARROW: '->';
108PLUS: '+';
109DOUBLE_PLUS: '++';
110MINUS: '-';
111ASTERISK: '*';
112DOUBLE_ASTERISK: '**';
113SLASH: '/';
114PERCENT: '%';
115PIPE: '|';
116DOUBLE_PIPE: '||';
117AMPERSAND: '&';
118DOUBLE_AMPERSAND: '&&';
119CARET: '^';
120AT: '@';
121TILDE: '~';
122EXCLAMATION_POINT: '!';
123
124EqualityOperator: '==' | '!=';
125CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
126ComparisonOperator: '>' | '<' | '>=' | '<=';
127BitshiftOperator: '>>' | '<<';
128
129IMAG: 'im';
130ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* IMAG;
131
132BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
133OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
134DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
135HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
136
137fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
138fragment Letter: [A-Za-z];
139fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
140fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
141
142Identifier: FirstIdCharacter GeneralIdCharacter*;
143HardwareQubit: '$' [0-9]+;
144
145fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
146FloatLiteral:
147 // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
148 DecimalIntegerLiteral FloatLiteralExponent
149 // .1234_5678 or .1e3 (no digits before the dot)
150 | DOT DecimalIntegerLiteral FloatLiteralExponent?
151 // 123.456, 123. or 145.32e+1_00
152 | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
153
154fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
155// represents explicit time value in SI or backend units
156TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* TimeUnit;
157
158BitstringLiteral: '"' ([01] '_'?)* [01] '"';
159
160// Ignore whitespace between tokens, and define C++-style comments.
161Whitespace: [ \t]+ -> skip ;
162Newline: [\r\n]+ -> skip ;
163LineComment : '//' ~[\r\n]* -> skip;
164BlockComment : '/*' .*? '*/' -> skip;
165
166
167// The version identifier token would be ambiguous between itself and
168// integer/floating-point literals, so we use a special mode to ensure it's
169// lexed correctly.
170mode VERSION_IDENTIFIER;
171 VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
172 VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
173
174// An include statement's path or defcalgrammar target is potentially ambiguous
175// with `BitstringLiteral`.
176mode ARBITRARY_STRING;
177 ARBITRARY_STRING_WHITESPACE: [ \t\r\n]+ -> skip;
178 // allow ``"str"`` and ``'str'``;
179 StringLiteral: ('"' ~["\r\t\n]+? '"' | '\'' ~['\r\t\n]+? '\'') -> popMode;
180
181
182// A different lexer mode to swap to when we need handle tokens on a line basis
183// rather than the default arbitrary-whitespace-based tokenisation. This is
184// used by the annotation and pragma rules.
185mode EAT_TO_LINE_END;
186 EAT_INITIAL_SPACE: [ \t]+ -> skip;
187 EAT_LINE_END: [\r\n] -> popMode, skip;
188
189 // The line content must be a non-empty token to satisfy ANTLR (otherwise it
190 // would be able to produce an infinite number of tokens). We could include
191 // the line ending to guarantee that this is always non-empty, but that just
192 // puts an annoying burden on consumers to remove it again.
193 RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
194
195
196// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
197// token. In both cases, there's a small interlude before the pulse grammar
198// block starts, and we need to be able to lex our way through that. We don't
199// want to tie this grammar to one host language by injecting host code to
200// manage the state of the lexer, so instead we need to do a little duplication
201// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
202mode CAL_PRELUDE;
203 CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
204 CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
205 CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
206
207mode DEFCAL_PRELUDE;
208 DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
209 DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
210 DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
211
212 // Duplications of valid constant expression tokens that may appear in the
213 // argument list. This is an unfortunately large number of duplications.
214
215 // Types.
216 DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
217 DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
218 DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
219 DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
220 DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
221 DEFCAL_PRELUDE_INT: INT -> type(INT);
222 DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
223 DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
224 DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
225 DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
226 DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
227 DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
228 // Punctuation.
229 DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
230 DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
231 DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
232 DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
233 DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
234 DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
235 DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
236 DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
237 DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
238 DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
239 DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
240 // Literals and names.
241 DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
242 DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
243 DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
244 DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
245 DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
246 DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
247 DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
248 DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
249 DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
250 DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
251 DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
252
253
254// The meat-and-potatoes of matching a calibration block with balanced inner
255// braces. We enter `CAL_BLOCK` with the opening brace already tokenised
256// (that's how the lexer knew to swap modes to us), and with the token left open
257// to continue to accumulate. We want to tokenise until we hit the balancing
258// brace. Since we have _no_ knowledge of what the inner langauge is doing,
259// things like unbalanced braces in comments will cause a failure, but there's
260// not much we can do about that without greater spec restrictions.
261mode CAL_BLOCK;
262 fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
263 CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
264 CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);