OpenQasm 3.0 Grammar¶
OpenQasm 3.0 Grammar specification based in ANTLR parser generator.
The ANTLR grammar is intended to serve as the official reference grammar for OpenQASM3 and defines the set of syntactically valid statements in the language. ANTLR is used because it provides a human-readable EBNF format that is computer-validatable. It provides an auto-generated parser that is used to validate the example files (see openqasm/examples/). As an extension, it can be used to check whether a source file is parseable OpenQASM3 (note this does not ensure that the OpenQASM3 is semantically correct).
The ANTLR parser does not attempt to be performant or construct an AST. Furthermore, it does not conduct any semantic analysis.
The ANTLR grammar may serve as a guide when developing compiler tooling for OpenQASM3.
1parser grammar qasm3Parser;
2
3options {
4 tokenVocab = qasm3Lexer;
5}
6
7program: version? statementOrScope* EOF;
8version: OPENQASM VersionSpecifier SEMICOLON;
9
10// A statement is any valid single statement of an OpenQASM 3 program, with the
11// exception of the version-definition statement (which must be unique, and the
12// first statement of the file if present). This file just defines rules for
13// parsing; we leave semantic analysis and rejection of invalid scopes for
14// compiler implementations.
15statement:
16 pragma
17 // All the actual statements of the language.
18 | annotation* (
19 aliasDeclarationStatement
20 | assignmentStatement
21 | barrierStatement
22 | boxStatement
23 | breakStatement
24 | calStatement
25 | calibrationGrammarStatement
26 | classicalDeclarationStatement
27 | constDeclarationStatement
28 | continueStatement
29 | defStatement
30 | defcalStatement
31 | delayStatement
32 | endStatement
33 | expressionStatement
34 | externStatement
35 | forStatement
36 | gateCallStatement
37 | gateStatement
38 | ifStatement
39 | includeStatement
40 | ioDeclarationStatement
41 | measureArrowAssignmentStatement
42 | oldStyleDeclarationStatement
43 | quantumDeclarationStatement
44 | resetStatement
45 | returnStatement
46 | switchStatement
47 | whileStatement
48 )
49;
50annotation: AnnotationKeyword RemainingLineContent?;
51scope: LBRACE statementOrScope* RBRACE;
52pragma: PRAGMA RemainingLineContent;
53
54statementOrScope: statement | scope;
55
56
57/* Start top-level statement definitions. */
58
59// Inclusion statements.
60calibrationGrammarStatement: DEFCALGRAMMAR StringLiteral SEMICOLON;
61includeStatement: INCLUDE StringLiteral SEMICOLON;
62
63// Control-flow statements.
64breakStatement: BREAK SEMICOLON;
65continueStatement: CONTINUE SEMICOLON;
66endStatement: END SEMICOLON;
67forStatement: FOR scalarType Identifier IN (setExpression | LBRACKET rangeExpression RBRACKET | expression) body=statementOrScope;
68ifStatement: IF LPAREN expression RPAREN if_body=statementOrScope (ELSE else_body=statementOrScope)?;
69returnStatement: RETURN (expression | measureExpression)? SEMICOLON;
70whileStatement: WHILE LPAREN expression RPAREN body=statementOrScope;
71switchStatement: SWITCH LPAREN expression RPAREN LBRACE switchCaseItem* RBRACE;
72switchCaseItem:
73 CASE expressionList scope
74 | DEFAULT scope
75;
76
77// Quantum directive statements.
78barrierStatement: BARRIER gateOperandList? SEMICOLON;
79boxStatement: BOX designator? scope;
80delayStatement: DELAY designator gateOperandList? SEMICOLON;
81/* `gateCallStatement` is split in two to avoid a potential ambiguity with an
82 * `expressionStatement` that consists of a single function call. The only
83 * "gate" that can have no operands is `gphase` with no control modifiers, and
84 * `gphase(pi);` looks grammatically identical to `fn(pi);`. We disambiguate by
85 * having `gphase` be its own token, and requiring that all other gate calls
86 * grammatically have at least one qubit. Strictly, as long as `gphase` is a
87 * separate token, ANTLR can disambiguate the statements by the definition
88 * order, but this is more robust. */
89gateCallStatement:
90 gateModifier* Identifier (LPAREN expressionList? RPAREN)? designator? gateOperandList SEMICOLON
91 | gateModifier* GPHASE (LPAREN expressionList? RPAREN)? designator? gateOperandList? SEMICOLON
92;
93// measureArrowAssignmentStatement also permits the case of not assigning the
94// result to any classical value too.
95measureArrowAssignmentStatement: measureExpression (ARROW indexedIdentifier)? SEMICOLON;
96resetStatement: RESET gateOperand SEMICOLON;
97
98// Primitive declaration statements.
99aliasDeclarationStatement: LET Identifier EQUALS aliasExpression SEMICOLON;
100classicalDeclarationStatement: (scalarType | arrayType) Identifier (EQUALS declarationExpression)? SEMICOLON;
101constDeclarationStatement: CONST scalarType Identifier EQUALS declarationExpression SEMICOLON;
102ioDeclarationStatement: (INPUT | OUTPUT) (scalarType | arrayType) Identifier SEMICOLON;
103oldStyleDeclarationStatement: (CREG | QREG) Identifier designator? SEMICOLON;
104quantumDeclarationStatement: qubitType Identifier SEMICOLON;
105
106// Declarations and definitions of higher-order objects.
107defStatement: DEF Identifier LPAREN argumentDefinitionList? RPAREN returnSignature? scope;
108externStatement: EXTERN Identifier LPAREN externArgumentList? RPAREN returnSignature? SEMICOLON;
109gateStatement: GATE Identifier (LPAREN params=identifierList? RPAREN)? qubits=identifierList scope;
110
111// Non-declaration assignments and calculations.
112assignmentStatement: indexedIdentifier op=(EQUALS | CompoundAssignmentOperator) (expression | measureExpression) SEMICOLON;
113expressionStatement: expression SEMICOLON;
114
115// Statements where the bulk is in the calibration language.
116calStatement: CAL LBRACE CalibrationBlock? RBRACE;
117defcalStatement: DEFCAL defcalTarget (LPAREN defcalArgumentDefinitionList? RPAREN)? defcalOperandList returnSignature? LBRACE CalibrationBlock? RBRACE;
118
119
120/* End top-level statement definitions. */
121/* Start expression definitions. */
122
123
124// ANTLR4 can handle direct left-recursive rules, and ambiguities are guaranteed
125// to resolve in the order of definition. This means that the order of rules
126// here defines the precedence table, from most tightly binding to least.
127expression:
128 LPAREN expression RPAREN # parenthesisExpression
129 | expression indexOperator # indexExpression
130 | <assoc=right> expression op=DOUBLE_ASTERISK expression # powerExpression
131 | op=(TILDE | EXCLAMATION_POINT | MINUS) expression # unaryExpression
132 | expression op=(ASTERISK | SLASH | PERCENT) expression # multiplicativeExpression
133 | expression op=(PLUS | MINUS) expression # additiveExpression
134 | expression op=BitshiftOperator expression # bitshiftExpression
135 | expression op=ComparisonOperator expression # comparisonExpression
136 | expression op=EqualityOperator expression # equalityExpression
137 | expression op=AMPERSAND expression # bitwiseAndExpression
138 | expression op=CARET expression # bitwiseXorExpression
139 | expression op=PIPE expression # bitwiseOrExpression
140 | expression op=DOUBLE_AMPERSAND expression # logicalAndExpression
141 | expression op=DOUBLE_PIPE expression # logicalOrExpression
142 | (scalarType | arrayType) LPAREN expression RPAREN # castExpression
143 | DURATIONOF LPAREN scope RPAREN # durationofExpression
144 | Identifier LPAREN expressionList? RPAREN # callExpression
145 | (
146 Identifier
147 | BinaryIntegerLiteral
148 | OctalIntegerLiteral
149 | DecimalIntegerLiteral
150 | HexIntegerLiteral
151 | FloatLiteral
152 | ImaginaryLiteral
153 | BooleanLiteral
154 | BitstringLiteral
155 | TimingLiteral
156 | HardwareQubit
157 ) # literalExpression
158;
159
160// Special-case expressions that are only valid in certain contexts. These are
161// not in the expression tree, but can contain elements that are within it.
162aliasExpression: expression (DOUBLE_PLUS expression)*;
163declarationExpression: arrayLiteral | expression | measureExpression;
164measureExpression: MEASURE gateOperand;
165rangeExpression: expression? COLON expression? (COLON expression)?;
166setExpression: LBRACE expression (COMMA expression)* COMMA? RBRACE;
167arrayLiteral: LBRACE (expression | arrayLiteral) (COMMA (expression | arrayLiteral))* COMMA? RBRACE;
168
169// The general form is a comma-separated list of indexing entities.
170// `setExpression` is only valid when being used as a single index: registers
171// can support it for creating aliases, but arrays cannot.
172indexOperator:
173 LBRACKET
174 (
175 setExpression
176 | (expression | rangeExpression) (COMMA (expression | rangeExpression))* COMMA?
177 )
178 RBRACKET;
179// Alternative form to `indexExpression` for cases where an obvious l-value is
180// better grammatically than a generic expression. Some current uses of this
181// rule may be better as `expression`, leaving the semantic analysis to later
182// (for example in gate calls).
183indexedIdentifier: Identifier indexOperator*;
184
185/* End expression definitions. */
186/* Start type definitions. */
187
188returnSignature: ARROW scalarType;
189gateModifier: (
190 INV
191 | POW LPAREN expression RPAREN
192 | (CTRL | NEGCTRL) (LPAREN expression RPAREN)?
193) AT;
194
195scalarType:
196 BIT designator?
197 | INT designator?
198 | UINT designator?
199 | FLOAT designator?
200 | ANGLE designator?
201 | BOOL
202 | DURATION
203 | STRETCH
204 | COMPLEX (LBRACKET scalarType RBRACKET)?
205;
206qubitType: QUBIT designator?;
207arrayType: ARRAY LBRACKET scalarType COMMA expressionList RBRACKET;
208arrayReferenceType: (READONLY | MUTABLE) ARRAY LBRACKET scalarType COMMA (expressionList | DIM EQUALS expression) RBRACKET;
209
210designator: LBRACKET expression RBRACKET;
211
212defcalTarget: MEASURE | RESET | DELAY | Identifier;
213defcalArgumentDefinition: expression | argumentDefinition;
214defcalOperand: HardwareQubit | Identifier;
215gateOperand: indexedIdentifier | HardwareQubit;
216externArgument: scalarType | arrayReferenceType | CREG designator?;
217argumentDefinition:
218 scalarType Identifier
219 | qubitType Identifier
220 | (CREG | QREG) Identifier designator?
221 | arrayReferenceType Identifier
222;
223
224argumentDefinitionList: argumentDefinition (COMMA argumentDefinition)* COMMA?;
225defcalArgumentDefinitionList: defcalArgumentDefinition (COMMA defcalArgumentDefinition)* COMMA?;
226defcalOperandList: defcalOperand (COMMA defcalOperand)* COMMA?;
227expressionList: expression (COMMA expression)* COMMA?;
228identifierList: Identifier (COMMA Identifier)* COMMA?;
229gateOperandList: gateOperand (COMMA gateOperand)* COMMA?;
230externArgumentList: externArgument (COMMA externArgument)* COMMA?;
1lexer grammar qasm3Lexer;
2
3/* Naming conventions in this lexer grammar
4 *
5 * - Keywords and exact symbols that have only one possible value are written in
6 * all caps. There is no more information in the parsed text than in the name
7 * of the lexeme. For example, `INCLUDE` is only ever the string `'include'`.
8 *
9 * - Lexemes with information in the string form are in PascalCase. This
10 * indicates there is more information in the token than just the name. For
11 * example, `Identifier` has a payload containing the name of the identifier.
12 */
13
14/* Language keywords. */
15
16OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
17INCLUDE: 'include' -> pushMode(ARBITRARY_STRING);
18DEFCALGRAMMAR: 'defcalgrammar' -> pushMode(ARBITRARY_STRING);
19DEF: 'def';
20CAL: 'cal' -> mode(CAL_PRELUDE);
21DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
22GATE: 'gate';
23EXTERN: 'extern';
24BOX: 'box';
25LET: 'let';
26
27BREAK: 'break';
28CONTINUE: 'continue';
29IF: 'if';
30ELSE: 'else';
31END: 'end';
32RETURN: 'return';
33FOR: 'for';
34WHILE: 'while';
35IN: 'in';
36SWITCH: 'switch';
37CASE: 'case';
38DEFAULT: 'default';
39
40PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
41AnnotationKeyword: '@' Identifier -> pushMode(EAT_TO_LINE_END);
42
43
44/* Types. */
45
46INPUT: 'input';
47OUTPUT: 'output';
48CONST: 'const';
49READONLY: 'readonly';
50MUTABLE: 'mutable';
51
52QREG: 'qreg';
53QUBIT: 'qubit';
54
55CREG: 'creg';
56BOOL: 'bool';
57BIT: 'bit';
58INT: 'int';
59UINT: 'uint';
60FLOAT: 'float';
61ANGLE: 'angle';
62COMPLEX: 'complex';
63ARRAY: 'array';
64VOID: 'void';
65
66DURATION: 'duration';
67STRETCH: 'stretch';
68
69
70/* Builtin identifiers and operations */
71
72GPHASE: 'gphase';
73INV: 'inv';
74POW: 'pow';
75CTRL: 'ctrl';
76NEGCTRL: 'negctrl';
77
78DIM: '#dim';
79
80DURATIONOF: 'durationof';
81
82DELAY: 'delay';
83RESET: 'reset';
84MEASURE: 'measure';
85BARRIER: 'barrier';
86
87BooleanLiteral: 'true' | 'false';
88
89
90/* Symbols */
91
92LBRACKET: '[';
93RBRACKET: ']';
94LBRACE: '{';
95RBRACE: '}';
96LPAREN: '(';
97RPAREN: ')';
98
99COLON: ':';
100SEMICOLON: ';';
101
102DOT: '.';
103COMMA: ',';
104
105EQUALS: '=';
106ARROW: '->';
107PLUS: '+';
108DOUBLE_PLUS: '++';
109MINUS: '-';
110ASTERISK: '*';
111DOUBLE_ASTERISK: '**';
112SLASH: '/';
113PERCENT: '%';
114PIPE: '|';
115DOUBLE_PIPE: '||';
116AMPERSAND: '&';
117DOUBLE_AMPERSAND: '&&';
118CARET: '^';
119AT: '@';
120TILDE: '~';
121EXCLAMATION_POINT: '!';
122
123EqualityOperator: '==' | '!=';
124CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
125ComparisonOperator: '>' | '<' | '>=' | '<=';
126BitshiftOperator: '>>' | '<<';
127
128IMAG: 'im';
129ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* IMAG;
130
131BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
132OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
133DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
134HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
135
136fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
137fragment Letter: [A-Za-z];
138fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
139fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
140
141Identifier: FirstIdCharacter GeneralIdCharacter*;
142HardwareQubit: '$' [0-9]+;
143
144fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
145FloatLiteral:
146 // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
147 DecimalIntegerLiteral FloatLiteralExponent
148 // .1234_5678 or .1e3 (no digits before the dot)
149 | DOT DecimalIntegerLiteral FloatLiteralExponent?
150 // 123.456, 123. or 145.32e+1_00
151 | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
152
153fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
154// represents explicit time value in SI or backend units
155TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* TimeUnit;
156
157BitstringLiteral: '"' ([01] '_'?)* [01] '"';
158
159// Ignore whitespace between tokens, and define C++-style comments.
160Whitespace: [ \t]+ -> skip ;
161Newline: [\r\n]+ -> skip ;
162LineComment : '//' ~[\r\n]* -> skip;
163BlockComment : '/*' .*? '*/' -> skip;
164
165
166// The version identifier token would be ambiguous between itself and
167// integer/floating-point literals, so we use a special mode to ensure it's
168// lexed correctly.
169mode VERSION_IDENTIFIER;
170 VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
171 VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
172
173// An include statement's path or defcalgrammar target is potentially ambiguous
174// with `BitstringLiteral`.
175mode ARBITRARY_STRING;
176 ARBITRARY_STRING_WHITESPACE: [ \t\r\n]+ -> skip;
177 // allow ``"str"`` and ``'str'``;
178 StringLiteral: ('"' ~["\r\t\n]+? '"' | '\'' ~['\r\t\n]+? '\'') -> popMode;
179
180
181// A different lexer mode to swap to when we need handle tokens on a line basis
182// rather than the default arbitrary-whitespace-based tokenisation. This is
183// used by the annotation and pragma rules.
184mode EAT_TO_LINE_END;
185 EAT_INITIAL_SPACE: [ \t]+ -> skip;
186 EAT_LINE_END: [\r\n] -> popMode, skip;
187
188 // The line content must be a non-empty token to satisfy ANTLR (otherwise it
189 // would be able to produce an infinite number of tokens). We could include
190 // the line ending to guarantee that this is always non-empty, but that just
191 // puts an annoying burden on consumers to remove it again.
192 RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
193
194
195// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
196// token. In both cases, there's a small interlude before the pulse grammar
197// block starts, and we need to be able to lex our way through that. We don't
198// want to tie this grammar to one host language by injecting host code to
199// manage the state of the lexer, so instead we need to do a little duplication
200// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
201mode CAL_PRELUDE;
202 CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
203 CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
204 CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
205
206mode DEFCAL_PRELUDE;
207 DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
208 DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
209 DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
210
211 // Duplications of valid constant expression tokens that may appear in the
212 // argument list. This is an unfortunately large number of duplications.
213
214 // Types.
215 DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
216 DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
217 DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
218 DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
219 DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
220 DEFCAL_PRELUDE_INT: INT -> type(INT);
221 DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
222 DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
223 DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
224 DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
225 DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
226 DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
227 // Punctuation.
228 DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
229 DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
230 DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
231 DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
232 DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
233 DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
234 DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
235 DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
236 DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
237 DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
238 DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
239 // Literals and names.
240 DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
241 DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
242 DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
243 DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
244 DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
245 DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
246 DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
247 DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
248 DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
249 DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
250 DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
251
252
253// The meat-and-potatoes of matching a calibration block with balanced inner
254// braces. We enter `CAL_BLOCK` with the opening brace already tokenised
255// (that's how the lexer knew to swap modes to us), and with the token left open
256// to continue to accumulate. We want to tokenise until we hit the balancing
257// brace. Since we have _no_ knowledge of what the inner langauge is doing,
258// things like unbalanced braces in comments will cause a failure, but there's
259// not much we can do about that without greater spec restrictions.
260mode CAL_BLOCK;
261 fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
262 CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
263 CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);