OpenQasm 3.0 Grammar

OpenQasm 3.0 Grammar specification based in ANTLR parser generator.

The ANTLR grammar is intended to serve as the official reference grammar for OpenQASM3 and defines the set of syntactically valid statements in the language. ANTLR is used because it provides a human-readable EBNF format that is computer-validatable. It provides an auto-generated parser that is used to validate the example files (see openqasm/examples/). As an extension, it can be used to check whether a source file is parseable OpenQASM3 (note this does not ensure that the OpenQASM3 is semantically correct).

The ANTLR parser does not attempt to be performant or construct an AST. Furthermore, it does not conduct any semantic analysis.

The ANTLR grammar may serve as a guide when developing compiler tooling for OpenQASM3.

  1parser grammar qasm3Parser;
  2
  3options {
  4    tokenVocab = qasm3Lexer;
  5}
  6
  7program: version? statementOrScope* EOF;
  8version: OPENQASM VersionSpecifier SEMICOLON;
  9
 10// A statement is any valid single statement of an OpenQASM 3 program, with the
 11// exception of the version-definition statement (which must be unique, and the
 12// first statement of the file if present).  This file just defines rules for
 13// parsing; we leave semantic analysis and rejection of invalid scopes for
 14// compiler implementations.
 15statement:
 16    pragma
 17    // All the actual statements of the language.
 18    | annotation* (
 19        aliasDeclarationStatement
 20        | assignmentStatement
 21        | barrierStatement
 22        | boxStatement
 23        | breakStatement
 24        | calStatement
 25        | calibrationGrammarStatement
 26        | classicalDeclarationStatement
 27        | constDeclarationStatement
 28        | continueStatement
 29        | defStatement
 30        | defcalStatement
 31        | delayStatement
 32        | endStatement
 33        | expressionStatement
 34        | externStatement
 35        | forStatement
 36        | gateCallStatement
 37        | gateStatement
 38        | ifStatement
 39        | includeStatement
 40        | ioDeclarationStatement
 41        | measureArrowAssignmentStatement
 42        | oldStyleDeclarationStatement
 43        | quantumDeclarationStatement
 44        | resetStatement
 45        | returnStatement
 46        | switchStatement
 47        | whileStatement
 48    )
 49;
 50annotation: AnnotationKeyword RemainingLineContent?;
 51scope: LBRACE statementOrScope* RBRACE;
 52pragma: PRAGMA RemainingLineContent;
 53
 54statementOrScope: statement | scope;
 55
 56
 57/* Start top-level statement definitions. */
 58
 59// Inclusion statements.
 60calibrationGrammarStatement: DEFCALGRAMMAR StringLiteral SEMICOLON;
 61includeStatement: INCLUDE StringLiteral SEMICOLON;
 62
 63// Control-flow statements.
 64breakStatement: BREAK SEMICOLON;
 65continueStatement: CONTINUE SEMICOLON;
 66endStatement: END SEMICOLON;
 67forStatement: FOR scalarType Identifier IN (setExpression | LBRACKET rangeExpression RBRACKET | expression) body=statementOrScope;
 68ifStatement: IF LPAREN expression RPAREN if_body=statementOrScope (ELSE else_body=statementOrScope)?;
 69returnStatement: RETURN (expression | measureExpression)? SEMICOLON;
 70whileStatement: WHILE LPAREN expression RPAREN body=statementOrScope;
 71switchStatement: SWITCH LPAREN expression RPAREN LBRACE switchCaseItem* RBRACE;
 72switchCaseItem:
 73    CASE expressionList scope
 74    | DEFAULT scope
 75;
 76
 77// Quantum directive statements.
 78barrierStatement: BARRIER gateOperandList? SEMICOLON;
 79boxStatement: BOX designator? scope;
 80delayStatement: DELAY designator gateOperandList? SEMICOLON;
 81/* `gateCallStatement`  is split in two to avoid a potential ambiguity with an
 82 * `expressionStatement` that consists of a single function call.  The only
 83 * "gate" that can have no operands is `gphase` with no control modifiers, and
 84 * `gphase(pi);` looks grammatically identical to `fn(pi);`.  We disambiguate by
 85 * having `gphase` be its own token, and requiring that all other gate calls
 86 * grammatically have at least one qubit.  Strictly, as long as `gphase` is a
 87 * separate token, ANTLR can disambiguate the statements by the definition
 88 * order, but this is more robust. */
 89gateCallStatement:
 90    gateModifier* Identifier (LPAREN expressionList? RPAREN)? designator? gateOperandList SEMICOLON
 91    | gateModifier* GPHASE (LPAREN expressionList? RPAREN)? designator? gateOperandList? SEMICOLON
 92;
 93// measureArrowAssignmentStatement also permits the case of not assigning the
 94// result to any classical value too.
 95measureArrowAssignmentStatement: measureExpression (ARROW indexedIdentifier)? SEMICOLON;
 96resetStatement: RESET gateOperand SEMICOLON;
 97
 98// Primitive declaration statements.
 99aliasDeclarationStatement: LET Identifier EQUALS aliasExpression SEMICOLON;
100classicalDeclarationStatement: (scalarType | arrayType) Identifier (EQUALS declarationExpression)? SEMICOLON;
101constDeclarationStatement: CONST scalarType Identifier EQUALS declarationExpression SEMICOLON;
102ioDeclarationStatement: (INPUT | OUTPUT) (scalarType | arrayType) Identifier SEMICOLON;
103oldStyleDeclarationStatement: (CREG | QREG) Identifier designator? SEMICOLON;
104quantumDeclarationStatement: qubitType Identifier SEMICOLON;
105
106// Declarations and definitions of higher-order objects.
107defStatement: DEF Identifier LPAREN argumentDefinitionList? RPAREN returnSignature? scope;
108externStatement: EXTERN Identifier LPAREN externArgumentList? RPAREN returnSignature? SEMICOLON;
109gateStatement: GATE Identifier (LPAREN params=identifierList? RPAREN)? qubits=identifierList scope;
110
111// Non-declaration assignments and calculations.
112assignmentStatement: indexedIdentifier op=(EQUALS | CompoundAssignmentOperator) (expression | measureExpression) SEMICOLON;
113expressionStatement: expression SEMICOLON;
114
115// Statements where the bulk is in the calibration language.
116calStatement: CAL LBRACE CalibrationBlock? RBRACE;
117defcalStatement: DEFCAL defcalTarget (LPAREN defcalArgumentDefinitionList? RPAREN)? defcalOperandList returnSignature? LBRACE CalibrationBlock? RBRACE;
118
119
120/* End top-level statement definitions. */
121/* Start expression definitions. */
122
123
124// ANTLR4 can handle direct left-recursive rules, and ambiguities are guaranteed
125// to resolve in the order of definition.  This means that the order of rules
126// here defines the precedence table, from most tightly binding to least.
127expression:
128    LPAREN expression RPAREN                                  # parenthesisExpression
129    | expression indexOperator                                # indexExpression
130    | <assoc=right> expression op=DOUBLE_ASTERISK expression  # powerExpression
131    | op=(TILDE | EXCLAMATION_POINT | MINUS) expression       # unaryExpression
132    | expression op=(ASTERISK | SLASH | PERCENT) expression   # multiplicativeExpression
133    | expression op=(PLUS | MINUS) expression                 # additiveExpression
134    | expression op=BitshiftOperator expression               # bitshiftExpression
135    | expression op=ComparisonOperator expression             # comparisonExpression
136    | expression op=EqualityOperator expression               # equalityExpression
137    | expression op=AMPERSAND expression                      # bitwiseAndExpression
138    | expression op=CARET expression                          # bitwiseXorExpression
139    | expression op=PIPE expression                           # bitwiseOrExpression
140    | expression op=DOUBLE_AMPERSAND expression               # logicalAndExpression
141    | expression op=DOUBLE_PIPE expression                    # logicalOrExpression
142    | (scalarType | arrayType) LPAREN expression RPAREN       # castExpression
143    | DURATIONOF LPAREN scope RPAREN                          # durationofExpression
144    | Identifier LPAREN expressionList? RPAREN                # callExpression
145    | (
146        Identifier
147        | BinaryIntegerLiteral
148        | OctalIntegerLiteral
149        | DecimalIntegerLiteral
150        | HexIntegerLiteral
151        | FloatLiteral
152        | ImaginaryLiteral
153        | BooleanLiteral
154        | BitstringLiteral
155        | TimingLiteral
156        | HardwareQubit
157      )                                                       # literalExpression
158;
159
160// Special-case expressions that are only valid in certain contexts.  These are
161// not in the expression tree, but can contain elements that are within it.
162aliasExpression: expression (DOUBLE_PLUS expression)*;
163declarationExpression: arrayLiteral | expression | measureExpression;
164measureExpression: MEASURE gateOperand;
165rangeExpression: expression? COLON expression? (COLON expression)?;
166setExpression: LBRACE expression (COMMA expression)* COMMA? RBRACE;
167arrayLiteral: LBRACE (expression | arrayLiteral) (COMMA (expression | arrayLiteral))* COMMA? RBRACE;
168
169// The general form is a comma-separated list of indexing entities.
170// `setExpression` is only valid when being used as a single index: registers
171// can support it for creating aliases, but arrays cannot.
172indexOperator:
173    LBRACKET
174    (
175        setExpression
176        | (expression | rangeExpression) (COMMA (expression | rangeExpression))* COMMA?
177    )
178    RBRACKET;
179// Alternative form to `indexExpression` for cases where an obvious l-value is
180// better grammatically than a generic expression.  Some current uses of this
181// rule may be better as `expression`, leaving the semantic analysis to later
182// (for example in gate calls).
183indexedIdentifier: Identifier indexOperator*;
184
185/* End expression definitions. */
186/* Start type definitions. */
187
188returnSignature: ARROW scalarType;
189gateModifier: (
190    INV
191    | POW LPAREN expression RPAREN
192    | (CTRL | NEGCTRL) (LPAREN expression RPAREN)?
193) AT;
194
195scalarType:
196    BIT designator?
197    | INT designator?
198    | UINT designator?
199    | FLOAT designator?
200    | ANGLE designator?
201    | BOOL
202    | DURATION
203    | STRETCH
204    | COMPLEX (LBRACKET scalarType RBRACKET)?
205;
206qubitType: QUBIT designator?;
207arrayType: ARRAY LBRACKET scalarType COMMA expressionList RBRACKET;
208arrayReferenceType: (READONLY | MUTABLE) ARRAY LBRACKET scalarType COMMA (expressionList | DIM EQUALS expression) RBRACKET;
209
210designator: LBRACKET expression RBRACKET;
211
212defcalTarget: MEASURE | RESET | DELAY | Identifier;
213defcalArgumentDefinition: expression | argumentDefinition;
214defcalOperand: HardwareQubit | Identifier;
215gateOperand: indexedIdentifier | HardwareQubit;
216externArgument: scalarType | arrayReferenceType | CREG designator?;
217argumentDefinition:
218    scalarType Identifier
219    | qubitType Identifier
220    | (CREG | QREG) Identifier designator?
221    | arrayReferenceType Identifier
222;
223
224argumentDefinitionList: argumentDefinition (COMMA argumentDefinition)* COMMA?;
225defcalArgumentDefinitionList: defcalArgumentDefinition (COMMA defcalArgumentDefinition)* COMMA?;
226defcalOperandList: defcalOperand (COMMA defcalOperand)* COMMA?;
227expressionList: expression (COMMA expression)* COMMA?;
228identifierList: Identifier (COMMA Identifier)* COMMA?;
229gateOperandList: gateOperand (COMMA gateOperand)* COMMA?;
230externArgumentList: externArgument (COMMA externArgument)* COMMA?;
  1lexer grammar qasm3Lexer;
  2
  3/* Naming conventions in this lexer grammar
  4 *
  5 * - Keywords and exact symbols that have only one possible value are written in
  6 *   all caps.  There is no more information in the parsed text than in the name
  7 *   of the lexeme.  For example, `INCLUDE` is only ever the string `'include'`.
  8 *
  9 * - Lexemes with information in the string form are in PascalCase.  This
 10 *   indicates there is more information in the token than just the name.  For
 11 *   example, `Identifier` has a payload containing the name of the identifier.
 12 */
 13
 14/* Language keywords. */
 15
 16OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
 17INCLUDE: 'include' -> pushMode(ARBITRARY_STRING);
 18DEFCALGRAMMAR: 'defcalgrammar' -> pushMode(ARBITRARY_STRING);
 19DEF: 'def';
 20CAL: 'cal' -> mode(CAL_PRELUDE);
 21DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
 22GATE: 'gate';
 23EXTERN: 'extern';
 24BOX: 'box';
 25LET: 'let';
 26
 27BREAK: 'break';
 28CONTINUE: 'continue';
 29IF: 'if';
 30ELSE: 'else';
 31END: 'end';
 32RETURN: 'return';
 33FOR: 'for';
 34WHILE: 'while';
 35IN: 'in';
 36SWITCH: 'switch';
 37CASE: 'case';
 38DEFAULT: 'default';
 39
 40PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
 41AnnotationKeyword: '@' Identifier ->  pushMode(EAT_TO_LINE_END);
 42
 43
 44/* Types. */
 45
 46INPUT: 'input';
 47OUTPUT: 'output';
 48CONST: 'const';
 49READONLY: 'readonly';
 50MUTABLE: 'mutable';
 51
 52QREG: 'qreg';
 53QUBIT: 'qubit';
 54
 55CREG: 'creg';
 56BOOL: 'bool';
 57BIT: 'bit';
 58INT: 'int';
 59UINT: 'uint';
 60FLOAT: 'float';
 61ANGLE: 'angle';
 62COMPLEX: 'complex';
 63ARRAY: 'array';
 64VOID: 'void';
 65
 66DURATION:  'duration';
 67STRETCH: 'stretch';
 68
 69
 70/* Builtin identifiers and operations */
 71
 72GPHASE: 'gphase';
 73INV: 'inv';
 74POW: 'pow';
 75CTRL: 'ctrl';
 76NEGCTRL: 'negctrl';
 77
 78DIM: '#dim';
 79
 80DURATIONOF: 'durationof';
 81
 82DELAY: 'delay';
 83RESET: 'reset';
 84MEASURE: 'measure';
 85BARRIER: 'barrier';
 86
 87BooleanLiteral: 'true' | 'false';
 88
 89
 90/* Symbols */
 91
 92LBRACKET: '[';
 93RBRACKET: ']';
 94LBRACE: '{';
 95RBRACE: '}';
 96LPAREN: '(';
 97RPAREN: ')';
 98
 99COLON: ':';
100SEMICOLON: ';';
101
102DOT: '.';
103COMMA: ',';
104
105EQUALS: '=';
106ARROW: '->';
107PLUS: '+';
108DOUBLE_PLUS: '++';
109MINUS: '-';
110ASTERISK: '*';
111DOUBLE_ASTERISK: '**';
112SLASH: '/';
113PERCENT: '%';
114PIPE: '|';
115DOUBLE_PIPE: '||';
116AMPERSAND: '&';
117DOUBLE_AMPERSAND: '&&';
118CARET: '^';
119AT: '@';
120TILDE: '~';
121EXCLAMATION_POINT: '!';
122
123EqualityOperator: '==' | '!=';
124CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
125ComparisonOperator: '>' | '<' | '>=' | '<=';
126BitshiftOperator: '>>' | '<<';
127
128IMAG: 'im';
129ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* IMAG;
130
131BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
132OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
133DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
134HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
135
136fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
137fragment Letter: [A-Za-z];
138fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
139fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
140
141Identifier: FirstIdCharacter GeneralIdCharacter*;
142HardwareQubit: '$' [0-9]+;
143
144fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
145FloatLiteral:
146    // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
147    DecimalIntegerLiteral FloatLiteralExponent
148    // .1234_5678 or .1e3 (no digits before the dot)
149    | DOT DecimalIntegerLiteral FloatLiteralExponent?
150    // 123.456, 123. or 145.32e+1_00
151    | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
152
153fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
154// represents explicit time value in SI or backend units
155TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) [ \t]* TimeUnit;
156
157BitstringLiteral: '"' ([01] '_'?)* [01] '"';
158
159// Ignore whitespace between tokens, and define C++-style comments.
160Whitespace: [ \t]+ -> skip ;
161Newline: [\r\n]+ -> skip ;
162LineComment : '//' ~[\r\n]* -> skip;
163BlockComment : '/*' .*? '*/' -> skip;
164
165
166// The version identifier token would be ambiguous between itself and
167// integer/floating-point literals, so we use a special mode to ensure it's
168// lexed correctly.
169mode VERSION_IDENTIFIER;
170    VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
171    VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
172
173// An include statement's path or defcalgrammar target is potentially ambiguous
174// with `BitstringLiteral`.
175mode ARBITRARY_STRING;
176    ARBITRARY_STRING_WHITESPACE: [ \t\r\n]+ -> skip;
177    // allow ``"str"`` and ``'str'``;
178    StringLiteral: ('"' ~["\r\t\n]+? '"' | '\'' ~['\r\t\n]+? '\'') -> popMode;
179
180
181// A different lexer mode to swap to when we need handle tokens on a line basis
182// rather than the default arbitrary-whitespace-based tokenisation.  This is
183// used by the annotation and pragma rules.
184mode EAT_TO_LINE_END;
185    EAT_INITIAL_SPACE: [ \t]+ -> skip;
186    EAT_LINE_END: [\r\n] -> popMode, skip;
187
188    // The line content must be a non-empty token to satisfy ANTLR (otherwise it
189    // would be able to produce an infinite number of tokens).  We could include
190    // the line ending to guarantee that this is always non-empty, but that just
191    // puts an annoying burden on consumers to remove it again.
192    RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
193
194
195// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
196// token.  In both cases, there's a small interlude before the pulse grammar
197// block starts, and we need to be able to lex our way through that.  We don't
198// want to tie this grammar to one host language by injecting host code to
199// manage the state of the lexer, so instead we need to do a little duplication
200// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
201mode CAL_PRELUDE;
202    CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
203    CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
204    CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
205
206mode DEFCAL_PRELUDE;
207    DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
208    DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
209    DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
210
211    // Duplications of valid constant expression tokens that may appear in the
212    // argument list.  This is an unfortunately large number of duplications.
213
214    // Types.
215    DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
216    DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
217    DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
218    DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
219    DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
220    DEFCAL_PRELUDE_INT: INT -> type(INT);
221    DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
222    DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
223    DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
224    DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
225    DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
226    DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
227    // Punctuation.
228    DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
229    DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
230    DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
231    DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
232    DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
233    DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
234    DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
235    DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
236    DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
237    DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
238    DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
239    // Literals and names.
240    DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
241    DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
242    DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
243    DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
244    DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
245    DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
246    DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
247    DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
248    DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
249    DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
250    DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
251
252
253// The meat-and-potatoes of matching a calibration block with balanced inner
254// braces.  We enter `CAL_BLOCK` with the opening brace already tokenised
255// (that's how the lexer knew to swap modes to us), and with the token left open
256// to continue to accumulate.  We want to tokenise until we hit the balancing
257// brace.  Since we have _no_ knowledge of what the inner langauge is doing,
258// things like unbalanced braces in comments will cause a failure, but there's
259// not much we can do about that without greater spec restrictions.
260mode CAL_BLOCK;
261    fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
262    CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
263    CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);