OpenQasm 3.0 Grammar¶
OpenQasm 3.0 Grammar specification based in ANTLR parser generator.
The ANTLR grammar is intended to serve as the official reference grammar for OpenQASM3 and defines the set of syntactically valid statements in the language. ANTLR is used because it provides a human-readable EBNF format that is computer-validatable. It provides an auto-generated parser that is used to validate the example files (see openqasm/examples/). As an extension, it can be used to check whether a source file is parseable OpenQASM3 (note this does not ensure that the OpenQASM3 is semantically correct).
The ANTLR parser does not attempt to be performant or construct an AST. Furthermore, it does not conduct any semantic analysis.
The ANTLR grammar may serve as a guide when developing compiler tooling for OpenQASM3.
1parser grammar qasm3Parser;
2
3options {
4 tokenVocab = qasm3Lexer;
5}
6
7program: version? statement* EOF;
8version: OPENQASM VersionSpecifier SEMICOLON;
9
10// A statement is any valid single statement of an OpenQASM 3 program, with the
11// exception of the version-definition statement (which must be unique, and the
12// first statement of the file if present). This file just defines rules for
13// parsing; we leave semantic analysis and rejection of invalid scopes for
14// compiler implementations.
15statement:
16 pragma
17 // All the actual statements of the language.
18 | annotation* (
19 aliasDeclarationStatement
20 | assignmentStatement
21 | barrierStatement
22 | boxStatement
23 | breakStatement
24 | calStatement
25 | calibrationGrammarStatement
26 | classicalDeclarationStatement
27 | constDeclarationStatement
28 | continueStatement
29 | defStatement
30 | defcalStatement
31 | delayStatement
32 | endStatement
33 | expressionStatement
34 | externStatement
35 | forStatement
36 | gateCallStatement
37 | gateStatement
38 | ifStatement
39 | includeStatement
40 | ioDeclarationStatement
41 | measureArrowAssignmentStatement
42 | oldStyleDeclarationStatement
43 | quantumDeclarationStatement
44 | resetStatement
45 | returnStatement
46 | whileStatement
47 )
48;
49annotation: AnnotationKeyword RemainingLineContent?;
50scope: LBRACE statement* RBRACE;
51pragma: PRAGMA RemainingLineContent;
52
53statementOrScope: statement | scope;
54
55
56/* Start top-level statement definitions. */
57
58// Inclusion statements.
59calibrationGrammarStatement: DEFCALGRAMMAR StringLiteral SEMICOLON;
60includeStatement: INCLUDE StringLiteral SEMICOLON;
61
62// Control-flow statements.
63breakStatement: BREAK SEMICOLON;
64continueStatement: CONTINUE SEMICOLON;
65endStatement: END SEMICOLON;
66forStatement: FOR scalarType Identifier IN (setExpression | LBRACKET rangeExpression RBRACKET | expression) body=statementOrScope;
67ifStatement: IF LPAREN expression RPAREN if_body=statementOrScope (ELSE else_body=statementOrScope)?;
68returnStatement: RETURN (expression | measureExpression)? SEMICOLON;
69whileStatement: WHILE LPAREN expression RPAREN body=statementOrScope;
70
71// Quantum directive statements.
72barrierStatement: BARRIER gateOperandList? SEMICOLON;
73boxStatement: BOX designator? scope;
74delayStatement: DELAY designator gateOperandList? SEMICOLON;
75/* `gateCallStatement` is split in two to avoid a potential ambiguity with an
76 * `expressionStatement` that consists of a single function call. The only
77 * "gate" that can have no operands is `gphase` with no control modifiers, and
78 * `gphase(pi);` looks grammatically identical to `fn(pi);`. We disambiguate by
79 * having `gphase` be its own token, and requiring that all other gate calls
80 * grammatically have at least one qubit. Strictly, as long as `gphase` is a
81 * separate token, ANTLR can disambiguate the statements by the definition
82 * order, but this is more robust. */
83gateCallStatement:
84 gateModifier* Identifier (LPAREN expressionList? RPAREN)? designator? gateOperandList SEMICOLON
85 | gateModifier* GPHASE (LPAREN expressionList? RPAREN)? designator? gateOperandList? SEMICOLON
86;
87// measureArrowAssignmentStatement also permits the case of not assigning the
88// result to any classical value too.
89measureArrowAssignmentStatement: measureExpression (ARROW indexedIdentifier)? SEMICOLON;
90resetStatement: RESET gateOperand SEMICOLON;
91
92// Primitive declaration statements.
93aliasDeclarationStatement: LET Identifier EQUALS aliasExpression SEMICOLON;
94classicalDeclarationStatement: (scalarType | arrayType) Identifier (EQUALS declarationExpression)? SEMICOLON;
95constDeclarationStatement: CONST scalarType Identifier EQUALS declarationExpression SEMICOLON;
96ioDeclarationStatement: (INPUT | OUTPUT) (scalarType | arrayType) Identifier SEMICOLON;
97oldStyleDeclarationStatement: (CREG | QREG) Identifier designator? SEMICOLON;
98quantumDeclarationStatement: qubitType Identifier SEMICOLON;
99
100// Declarations and definitions of higher-order objects.
101defStatement: DEF Identifier LPAREN argumentDefinitionList? RPAREN returnSignature? scope;
102externStatement: EXTERN Identifier LPAREN externArgumentList? RPAREN returnSignature? SEMICOLON;
103gateStatement: GATE Identifier (LPAREN params=identifierList? RPAREN)? qubits=identifierList scope;
104
105// Non-declaration assignments and calculations.
106assignmentStatement: indexedIdentifier op=(EQUALS | CompoundAssignmentOperator) (expression | measureExpression) SEMICOLON;
107expressionStatement: expression SEMICOLON;
108
109// Statements where the bulk is in the calibration language.
110calStatement: CAL LBRACE CalibrationBlock? RBRACE;
111defcalStatement: DEFCAL defcalTarget (LPAREN defcalArgumentDefinitionList? RPAREN)? defcalOperandList returnSignature? LBRACE CalibrationBlock? RBRACE;
112
113
114/* End top-level statement definitions. */
115/* Start expression definitions. */
116
117
118// ANTLR4 can handle direct left-recursive rules, and ambiguities are guaranteed
119// to resolve in the order of definition. This means that the order of rules
120// here defines the precedence table, from most tightly binding to least.
121expression:
122 LPAREN expression RPAREN # parenthesisExpression
123 | expression indexOperator # indexExpression
124 | <assoc=right> expression op=DOUBLE_ASTERISK expression # powerExpression
125 | op=(TILDE | EXCLAMATION_POINT | MINUS) expression # unaryExpression
126 | expression op=(ASTERISK | SLASH | PERCENT) expression # multiplicativeExpression
127 | expression op=(PLUS | MINUS) expression # additiveExpression
128 | expression op=BitshiftOperator expression # bitshiftExpression
129 | expression op=ComparisonOperator expression # comparisonExpression
130 | expression op=EqualityOperator expression # equalityExpression
131 | expression op=AMPERSAND expression # bitwiseAndExpression
132 | expression op=CARET expression # bitwiseXorExpression
133 | expression op=PIPE expression # bitwiseOrExpression
134 | expression op=DOUBLE_AMPERSAND expression # logicalAndExpression
135 | expression op=DOUBLE_PIPE expression # logicalOrExpression
136 | (scalarType | arrayType) LPAREN expression RPAREN # castExpression
137 | DURATIONOF LPAREN scope RPAREN # durationofExpression
138 | Identifier LPAREN expressionList? RPAREN # callExpression
139 | (
140 Identifier
141 | BinaryIntegerLiteral
142 | OctalIntegerLiteral
143 | DecimalIntegerLiteral
144 | HexIntegerLiteral
145 | FloatLiteral
146 | ImaginaryLiteral
147 | BooleanLiteral
148 | BitstringLiteral
149 | TimingLiteral
150 | HardwareQubit
151 ) # literalExpression
152;
153
154// Special-case expressions that are only valid in certain contexts. These are
155// not in the expression tree, but can contain elements that are within it.
156aliasExpression: expression (DOUBLE_PLUS expression)*;
157declarationExpression: arrayLiteral | expression | measureExpression;
158measureExpression: MEASURE gateOperand;
159rangeExpression: expression? COLON expression? (COLON expression)?;
160setExpression: LBRACE expression (COMMA expression)* COMMA? RBRACE;
161arrayLiteral: LBRACE (expression | arrayLiteral) (COMMA (expression | arrayLiteral))* COMMA? RBRACE;
162
163// The general form is a comma-separated list of indexing entities.
164// `setExpression` is only valid when being used as a single index: registers
165// can support it for creating aliases, but arrays cannot.
166indexOperator:
167 LBRACKET
168 (
169 setExpression
170 | (expression | rangeExpression) (COMMA (expression | rangeExpression))* COMMA?
171 )
172 RBRACKET;
173// Alternative form to `indexExpression` for cases where an obvious l-value is
174// better grammatically than a generic expression. Some current uses of this
175// rule may be better as `expression`, leaving the semantic analysis to later
176// (for example in gate calls).
177indexedIdentifier: Identifier indexOperator*;
178
179/* End expression definitions. */
180/* Start type definitions. */
181
182returnSignature: ARROW scalarType;
183gateModifier: (
184 INV
185 | POW LPAREN expression RPAREN
186 | (CTRL | NEGCTRL) (LPAREN expression RPAREN)?
187) AT;
188
189scalarType:
190 BIT designator?
191 | INT designator?
192 | UINT designator?
193 | FLOAT designator?
194 | ANGLE designator?
195 | BOOL
196 | DURATION
197 | STRETCH
198 | COMPLEX (LBRACKET scalarType RBRACKET)?
199;
200qubitType: QUBIT designator?;
201arrayType: ARRAY LBRACKET scalarType COMMA expressionList RBRACKET;
202arrayReferenceType: (READONLY | MUTABLE) ARRAY LBRACKET scalarType COMMA (expressionList | DIM EQUALS expression) RBRACKET;
203
204designator: LBRACKET expression RBRACKET;
205
206defcalTarget: MEASURE | RESET | DELAY | Identifier;
207defcalArgumentDefinition: expression | argumentDefinition;
208defcalOperand: HardwareQubit | Identifier;
209gateOperand: indexedIdentifier | HardwareQubit;
210externArgument: scalarType | arrayReferenceType | CREG designator?;
211argumentDefinition:
212 scalarType Identifier
213 | qubitType Identifier
214 | (CREG | QREG) Identifier designator?
215 | arrayReferenceType Identifier
216;
217
218argumentDefinitionList: argumentDefinition (COMMA argumentDefinition)* COMMA?;
219defcalArgumentDefinitionList: defcalArgumentDefinition (COMMA defcalArgumentDefinition)* COMMA?;
220defcalOperandList: defcalOperand (COMMA defcalOperand)* COMMA?;
221expressionList: expression (COMMA expression)* COMMA?;
222identifierList: Identifier (COMMA Identifier)* COMMA?;
223gateOperandList: gateOperand (COMMA gateOperand)* COMMA?;
224externArgumentList: externArgument (COMMA externArgument)* COMMA?;
1lexer grammar qasm3Lexer;
2
3/* Naming conventions in this lexer grammar
4 *
5 * - Keywords and exact symbols that have only one possible value are written in
6 * all caps. There is no more information in the parsed text than in the name
7 * of the lexeme. For example, `INCLUDE` is only ever the string `'include'`.
8 *
9 * - Lexemes with information in the string form are in PascalCase. This
10 * indicates there is more information in the token than just the name. For
11 * example, `Identifier` has a payload containing the name of the identifier.
12 */
13
14/* Language keywords. */
15
16OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
17INCLUDE: 'include';
18DEFCALGRAMMAR: 'defcalgrammar';
19DEF: 'def';
20CAL: 'cal' -> mode(CAL_PRELUDE);
21DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
22GATE: 'gate';
23EXTERN: 'extern';
24BOX: 'box';
25LET: 'let';
26
27BREAK: 'break';
28CONTINUE: 'continue';
29IF: 'if';
30ELSE: 'else';
31END: 'end';
32RETURN: 'return';
33FOR: 'for';
34WHILE: 'while';
35IN: 'in';
36
37PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
38AnnotationKeyword: '@' Identifier -> pushMode(EAT_TO_LINE_END);
39
40
41/* Types. */
42
43INPUT: 'input';
44OUTPUT: 'output';
45CONST: 'const';
46READONLY: 'readonly';
47MUTABLE: 'mutable';
48
49QREG: 'qreg';
50QUBIT: 'qubit';
51
52CREG: 'creg';
53BOOL: 'bool';
54BIT: 'bit';
55INT: 'int';
56UINT: 'uint';
57FLOAT: 'float';
58ANGLE: 'angle';
59COMPLEX: 'complex';
60ARRAY: 'array';
61VOID: 'void';
62
63DURATION: 'duration';
64STRETCH: 'stretch';
65
66
67/* Builtin identifiers and operations */
68
69GPHASE: 'gphase';
70INV: 'inv';
71POW: 'pow';
72CTRL: 'ctrl';
73NEGCTRL: 'negctrl';
74
75DIM: '#dim';
76
77DURATIONOF: 'durationof';
78
79DELAY: 'delay';
80RESET: 'reset';
81MEASURE: 'measure';
82BARRIER: 'barrier';
83
84BooleanLiteral: 'true' | 'false';
85
86
87/* Symbols */
88
89LBRACKET: '[';
90RBRACKET: ']';
91LBRACE: '{';
92RBRACE: '}';
93LPAREN: '(';
94RPAREN: ')';
95
96COLON: ':';
97SEMICOLON: ';';
98
99DOT: '.';
100COMMA: ',';
101
102EQUALS: '=';
103ARROW: '->';
104PLUS: '+';
105DOUBLE_PLUS: '++';
106MINUS: '-';
107ASTERISK: '*';
108DOUBLE_ASTERISK: '**';
109SLASH: '/';
110PERCENT: '%';
111PIPE: '|';
112DOUBLE_PIPE: '||';
113AMPERSAND: '&';
114DOUBLE_AMPERSAND: '&&';
115CARET: '^';
116AT: '@';
117TILDE: '~';
118EXCLAMATION_POINT: '!';
119
120EqualityOperator: '==' | '!=';
121CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
122ComparisonOperator: '>' | '<' | '>=' | '<=';
123BitshiftOperator: '>>' | '<<';
124
125IMAG: 'im';
126ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) ' '* IMAG;
127
128BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
129OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
130DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
131HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
132
133fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
134fragment Letter: [A-Za-z];
135fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
136fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
137
138Identifier: FirstIdCharacter GeneralIdCharacter*;
139HardwareQubit: '$' [0-9]+;
140
141fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
142FloatLiteral:
143 // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
144 DecimalIntegerLiteral FloatLiteralExponent
145 // .1234_5678 or .1e3 (no digits before the dot)
146 | DOT DecimalIntegerLiteral FloatLiteralExponent?
147 // 123.456, 123. or 145.32e+1_00
148 | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
149
150fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
151// represents explicit time value in SI or backend units
152TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) TimeUnit;
153
154
155BitstringLiteral: '"' ([01] '_'?)* [01] '"';
156// allow ``"str"`` and ``'str'``
157StringLiteral
158 : '"' ~["\r\t\n]+? '"'
159 | '\'' ~['\r\t\n]+? '\''
160 ;
161
162// Ignore whitespace between tokens, and define C++-style comments.
163Whitespace: [ \t]+ -> skip ;
164Newline: [\r\n]+ -> skip ;
165LineComment : '//' ~[\r\n]* -> skip;
166BlockComment : '/*' .*? '*/' -> skip;
167
168
169// The version identifier token would be ambiguous between itself and
170// integer/floating-point literals, so we use a special mode to ensure it's
171// lexed correctly.
172mode VERSION_IDENTIFIER;
173 VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
174 VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
175
176
177// A different lexer mode to swap to when we need handle tokens on a line basis
178// rather than the default arbitrary-whitespace-based tokenisation. This is
179// used by the annotation and pragma rules.
180mode EAT_TO_LINE_END;
181 EAT_INITIAL_SPACE: [ \t]+ -> skip;
182 EAT_LINE_END: [\r\n] -> popMode, skip;
183
184 // The line content must be a non-empty token to satisfy ANTLR (otherwise it
185 // would be able to produce an infinite number of tokens). We could include
186 // the line ending to guarantee that this is always non-empty, but that just
187 // puts an annoying burden on consumers to remove it again.
188 RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
189
190
191// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
192// token. In both cases, there's a small interlude before the pulse grammar
193// block starts, and we need to be able to lex our way through that. We don't
194// want to tie this grammar to one host language by injecting host code to
195// manage the state of the lexer, so instead we need to do a little duplication
196// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
197mode CAL_PRELUDE;
198 CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
199 CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
200 CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
201
202mode DEFCAL_PRELUDE;
203 DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
204 DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
205 DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
206
207 // Duplications of valid constant expression tokens that may appear in the
208 // argument list. This is an unfortunately large number of duplications.
209
210 // Types.
211 DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
212 DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
213 DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
214 DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
215 DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
216 DEFCAL_PRELUDE_INT: INT -> type(INT);
217 DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
218 DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
219 DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
220 DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
221 DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
222 DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
223 // Punctuation.
224 DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
225 DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
226 DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
227 DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
228 DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
229 DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
230 DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
231 DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
232 DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
233 DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
234 DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
235 // Literals and names.
236 DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
237 DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
238 DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
239 DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
240 DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
241 DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
242 DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
243 DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
244 DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
245 DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
246 DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
247
248
249// The meat-and-potatoes of matching a calibration block with balanced inner
250// braces. We enter `CAL_BLOCK` with the opening brace already tokenised
251// (that's how the lexer knew to swap modes to us), and with the token left open
252// to continue to accumulate. We want to tokenise until we hit the balancing
253// brace. Since we have _no_ knowledge of what the inner langauge is doing,
254// things like unbalanced braces in comments will cause a failure, but there's
255// not much we can do about that without greater spec restrictions.
256mode CAL_BLOCK;
257 fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
258 CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
259 CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);