OpenQasm 3.0 Grammar

OpenQasm 3.0 Grammar specification based in ANTLR parser generator.

The ANTLR grammar is intended to serve as the official reference grammar for OpenQASM3 and defines the set of syntactically valid statements in the language. ANTLR is used because it provides a human-readable EBNF format that is computer-validatable. It provides an auto-generated parser that is used to validate the example files (see openqasm/examples/). As an extension, it can be used to check whether a source file is parseable OpenQASM3 (note this does not ensure that the OpenQASM3 is semantically correct).

The ANTLR parser does not attempt to be performant or construct an AST. Furthermore, it does not conduct any semantic analysis.

The ANTLR grammar may serve as a guide when developing compiler tooling for OpenQASM3.

  1parser grammar qasm3Parser;
  2
  3options {
  4    tokenVocab = qasm3Lexer;
  5}
  6
  7program: version? statement* EOF;
  8version: OPENQASM VersionSpecifier SEMICOLON;
  9
 10// A statement is any valid single statement of an OpenQASM 3 program, with the
 11// exception of the version-definition statement (which must be unique, and the
 12// first statement of the file if present).  This file just defines rules for
 13// parsing; we leave semantic analysis and rejection of invalid scopes for
 14// compiler implementations.
 15statement:
 16    pragma
 17    // All the actual statements of the language.
 18    | annotation* (
 19        aliasDeclarationStatement
 20        | assignmentStatement
 21        | barrierStatement
 22        | boxStatement
 23        | breakStatement
 24        | calStatement
 25        | calibrationGrammarStatement
 26        | classicalDeclarationStatement
 27        | constDeclarationStatement
 28        | continueStatement
 29        | defStatement
 30        | defcalStatement
 31        | delayStatement
 32        | endStatement
 33        | expressionStatement
 34        | externStatement
 35        | forStatement
 36        | gateCallStatement
 37        | gateStatement
 38        | ifStatement
 39        | includeStatement
 40        | ioDeclarationStatement
 41        | measureArrowAssignmentStatement
 42        | oldStyleDeclarationStatement
 43        | quantumDeclarationStatement
 44        | resetStatement
 45        | returnStatement
 46        | whileStatement
 47    )
 48;
 49annotation: AnnotationKeyword RemainingLineContent?;
 50scope: LBRACE statement* RBRACE;
 51pragma: PRAGMA RemainingLineContent;
 52
 53statementOrScope: statement | scope;
 54
 55
 56/* Start top-level statement definitions. */
 57
 58// Inclusion statements.
 59calibrationGrammarStatement: DEFCALGRAMMAR StringLiteral SEMICOLON;
 60includeStatement: INCLUDE StringLiteral SEMICOLON;
 61
 62// Control-flow statements.
 63breakStatement: BREAK SEMICOLON;
 64continueStatement: CONTINUE SEMICOLON;
 65endStatement: END SEMICOLON;
 66forStatement: FOR scalarType Identifier IN (setExpression | LBRACKET rangeExpression RBRACKET | expression) body=statementOrScope;
 67ifStatement: IF LPAREN expression RPAREN if_body=statementOrScope (ELSE else_body=statementOrScope)?;
 68returnStatement: RETURN (expression | measureExpression)? SEMICOLON;
 69whileStatement: WHILE LPAREN expression RPAREN body=statementOrScope;
 70
 71// Quantum directive statements.
 72barrierStatement: BARRIER gateOperandList? SEMICOLON;
 73boxStatement: BOX designator? scope;
 74delayStatement: DELAY designator gateOperandList? SEMICOLON;
 75/* `gateCallStatement`  is split in two to avoid a potential ambiguity with an
 76 * `expressionStatement` that consists of a single function call.  The only
 77 * "gate" that can have no operands is `gphase` with no control modifiers, and
 78 * `gphase(pi);` looks grammatically identical to `fn(pi);`.  We disambiguate by
 79 * having `gphase` be its own token, and requiring that all other gate calls
 80 * grammatically have at least one qubit.  Strictly, as long as `gphase` is a
 81 * separate token, ANTLR can disambiguate the statements by the definition
 82 * order, but this is more robust. */
 83gateCallStatement:
 84    gateModifier* Identifier (LPAREN expressionList? RPAREN)? designator? gateOperandList SEMICOLON
 85    | gateModifier* GPHASE (LPAREN expressionList? RPAREN)? designator? gateOperandList? SEMICOLON
 86;
 87// measureArrowAssignmentStatement also permits the case of not assigning the
 88// result to any classical value too.
 89measureArrowAssignmentStatement: measureExpression (ARROW indexedIdentifier)? SEMICOLON;
 90resetStatement: RESET gateOperand SEMICOLON;
 91
 92// Primitive declaration statements.
 93aliasDeclarationStatement: LET Identifier EQUALS aliasExpression SEMICOLON;
 94classicalDeclarationStatement: (scalarType | arrayType) Identifier (EQUALS declarationExpression)? SEMICOLON;
 95constDeclarationStatement: CONST scalarType Identifier EQUALS declarationExpression SEMICOLON;
 96ioDeclarationStatement: (INPUT | OUTPUT) (scalarType | arrayType) Identifier SEMICOLON;
 97oldStyleDeclarationStatement: (CREG | QREG) Identifier designator? SEMICOLON;
 98quantumDeclarationStatement: qubitType Identifier SEMICOLON;
 99
100// Declarations and definitions of higher-order objects.
101defStatement: DEF Identifier LPAREN argumentDefinitionList? RPAREN returnSignature? scope;
102externStatement: EXTERN Identifier LPAREN externArgumentList? RPAREN returnSignature? SEMICOLON;
103gateStatement: GATE Identifier (LPAREN params=identifierList? RPAREN)? qubits=identifierList scope;
104
105// Non-declaration assignments and calculations.
106assignmentStatement: indexedIdentifier op=(EQUALS | CompoundAssignmentOperator) (expression | measureExpression) SEMICOLON;
107expressionStatement: expression SEMICOLON;
108
109// Statements where the bulk is in the calibration language.
110calStatement: CAL LBRACE CalibrationBlock? RBRACE;
111defcalStatement: DEFCAL defcalTarget (LPAREN defcalArgumentDefinitionList? RPAREN)? defcalOperandList returnSignature? LBRACE CalibrationBlock? RBRACE;
112
113
114/* End top-level statement definitions. */
115/* Start expression definitions. */
116
117
118// ANTLR4 can handle direct left-recursive rules, and ambiguities are guaranteed
119// to resolve in the order of definition.  This means that the order of rules
120// here defines the precedence table, from most tightly binding to least.
121expression:
122    LPAREN expression RPAREN                                  # parenthesisExpression
123    | expression indexOperator                                # indexExpression
124    | <assoc=right> expression op=DOUBLE_ASTERISK expression  # powerExpression
125    | op=(TILDE | EXCLAMATION_POINT | MINUS) expression       # unaryExpression
126    | expression op=(ASTERISK | SLASH | PERCENT) expression   # multiplicativeExpression
127    | expression op=(PLUS | MINUS) expression                 # additiveExpression
128    | expression op=BitshiftOperator expression               # bitshiftExpression
129    | expression op=ComparisonOperator expression             # comparisonExpression
130    | expression op=EqualityOperator expression               # equalityExpression
131    | expression op=AMPERSAND expression                      # bitwiseAndExpression
132    | expression op=CARET expression                          # bitwiseXorExpression
133    | expression op=PIPE expression                           # bitwiseOrExpression
134    | expression op=DOUBLE_AMPERSAND expression               # logicalAndExpression
135    | expression op=DOUBLE_PIPE expression                    # logicalOrExpression
136    | (scalarType | arrayType) LPAREN expression RPAREN       # castExpression
137    | DURATIONOF LPAREN scope RPAREN                          # durationofExpression
138    | Identifier LPAREN expressionList? RPAREN                # callExpression
139    | (
140        Identifier
141        | BinaryIntegerLiteral
142        | OctalIntegerLiteral
143        | DecimalIntegerLiteral
144        | HexIntegerLiteral
145        | FloatLiteral
146        | ImaginaryLiteral
147        | BooleanLiteral
148        | BitstringLiteral
149        | TimingLiteral
150        | HardwareQubit
151      )                                                       # literalExpression
152;
153
154// Special-case expressions that are only valid in certain contexts.  These are
155// not in the expression tree, but can contain elements that are within it.
156aliasExpression: expression (DOUBLE_PLUS expression)*;
157declarationExpression: arrayLiteral | expression | measureExpression;
158measureExpression: MEASURE gateOperand;
159rangeExpression: expression? COLON expression? (COLON expression)?;
160setExpression: LBRACE expression (COMMA expression)* COMMA? RBRACE;
161arrayLiteral: LBRACE (expression | arrayLiteral) (COMMA (expression | arrayLiteral))* COMMA? RBRACE;
162
163// The general form is a comma-separated list of indexing entities.
164// `setExpression` is only valid when being used as a single index: registers
165// can support it for creating aliases, but arrays cannot.
166indexOperator:
167    LBRACKET
168    (
169        setExpression
170        | (expression | rangeExpression) (COMMA (expression | rangeExpression))* COMMA?
171    )
172    RBRACKET;
173// Alternative form to `indexExpression` for cases where an obvious l-value is
174// better grammatically than a generic expression.  Some current uses of this
175// rule may be better as `expression`, leaving the semantic analysis to later
176// (for example in gate calls).
177indexedIdentifier: Identifier indexOperator*;
178
179/* End expression definitions. */
180/* Start type definitions. */
181
182returnSignature: ARROW scalarType;
183gateModifier: (
184    INV
185    | POW LPAREN expression RPAREN
186    | (CTRL | NEGCTRL) (LPAREN expression RPAREN)?
187) AT;
188
189scalarType:
190    BIT designator?
191    | INT designator?
192    | UINT designator?
193    | FLOAT designator?
194    | ANGLE designator?
195    | BOOL
196    | DURATION
197    | STRETCH
198    | COMPLEX (LBRACKET scalarType RBRACKET)?
199;
200qubitType: QUBIT designator?;
201arrayType: ARRAY LBRACKET scalarType COMMA expressionList RBRACKET;
202arrayReferenceType: (READONLY | MUTABLE) ARRAY LBRACKET scalarType COMMA (expressionList | DIM EQUALS expression) RBRACKET;
203
204designator: LBRACKET expression RBRACKET;
205
206defcalTarget: MEASURE | RESET | DELAY | Identifier;
207defcalArgumentDefinition: expression | argumentDefinition;
208defcalOperand: HardwareQubit | Identifier;
209gateOperand: indexedIdentifier | HardwareQubit;
210externArgument: scalarType | arrayReferenceType | CREG designator?;
211argumentDefinition:
212    scalarType Identifier
213    | qubitType Identifier
214    | (CREG | QREG) Identifier designator?
215    | arrayReferenceType Identifier
216;
217
218argumentDefinitionList: argumentDefinition (COMMA argumentDefinition)* COMMA?;
219defcalArgumentDefinitionList: defcalArgumentDefinition (COMMA defcalArgumentDefinition)* COMMA?;
220defcalOperandList: defcalOperand (COMMA defcalOperand)* COMMA?;
221expressionList: expression (COMMA expression)* COMMA?;
222identifierList: Identifier (COMMA Identifier)* COMMA?;
223gateOperandList: gateOperand (COMMA gateOperand)* COMMA?;
224externArgumentList: externArgument (COMMA externArgument)* COMMA?;
  1lexer grammar qasm3Lexer;
  2
  3/* Naming conventions in this lexer grammar
  4 *
  5 * - Keywords and exact symbols that have only one possible value are written in
  6 *   all caps.  There is no more information in the parsed text than in the name
  7 *   of the lexeme.  For example, `INCLUDE` is only ever the string `'include'`.
  8 *
  9 * - Lexemes with information in the string form are in PascalCase.  This
 10 *   indicates there is more information in the token than just the name.  For
 11 *   example, `Identifier` has a payload containing the name of the identifier.
 12 */
 13
 14/* Language keywords. */
 15
 16OPENQASM: 'OPENQASM' -> pushMode(VERSION_IDENTIFIER);
 17INCLUDE: 'include';
 18DEFCALGRAMMAR: 'defcalgrammar';
 19DEF: 'def';
 20CAL: 'cal' -> mode(CAL_PRELUDE);
 21DEFCAL: 'defcal' -> mode(DEFCAL_PRELUDE);
 22GATE: 'gate';
 23EXTERN: 'extern';
 24BOX: 'box';
 25LET: 'let';
 26
 27BREAK: 'break';
 28CONTINUE: 'continue';
 29IF: 'if';
 30ELSE: 'else';
 31END: 'end';
 32RETURN: 'return';
 33FOR: 'for';
 34WHILE: 'while';
 35IN: 'in';
 36
 37PRAGMA: '#'? 'pragma' -> pushMode(EAT_TO_LINE_END);
 38AnnotationKeyword: '@' Identifier ->  pushMode(EAT_TO_LINE_END);
 39
 40
 41/* Types. */
 42
 43INPUT: 'input';
 44OUTPUT: 'output';
 45CONST: 'const';
 46READONLY: 'readonly';
 47MUTABLE: 'mutable';
 48
 49QREG: 'qreg';
 50QUBIT: 'qubit';
 51
 52CREG: 'creg';
 53BOOL: 'bool';
 54BIT: 'bit';
 55INT: 'int';
 56UINT: 'uint';
 57FLOAT: 'float';
 58ANGLE: 'angle';
 59COMPLEX: 'complex';
 60ARRAY: 'array';
 61VOID: 'void';
 62
 63DURATION:  'duration';
 64STRETCH: 'stretch';
 65
 66
 67/* Builtin identifiers and operations */
 68
 69GPHASE: 'gphase';
 70INV: 'inv';
 71POW: 'pow';
 72CTRL: 'ctrl';
 73NEGCTRL: 'negctrl';
 74
 75DIM: '#dim';
 76
 77DURATIONOF: 'durationof';
 78
 79DELAY: 'delay';
 80RESET: 'reset';
 81MEASURE: 'measure';
 82BARRIER: 'barrier';
 83
 84BooleanLiteral: 'true' | 'false';
 85
 86
 87/* Symbols */
 88
 89LBRACKET: '[';
 90RBRACKET: ']';
 91LBRACE: '{';
 92RBRACE: '}';
 93LPAREN: '(';
 94RPAREN: ')';
 95
 96COLON: ':';
 97SEMICOLON: ';';
 98
 99DOT: '.';
100COMMA: ',';
101
102EQUALS: '=';
103ARROW: '->';
104PLUS: '+';
105DOUBLE_PLUS: '++';
106MINUS: '-';
107ASTERISK: '*';
108DOUBLE_ASTERISK: '**';
109SLASH: '/';
110PERCENT: '%';
111PIPE: '|';
112DOUBLE_PIPE: '||';
113AMPERSAND: '&';
114DOUBLE_AMPERSAND: '&&';
115CARET: '^';
116AT: '@';
117TILDE: '~';
118EXCLAMATION_POINT: '!';
119
120EqualityOperator: '==' | '!=';
121CompoundAssignmentOperator: '+=' | '-=' | '*=' | '/=' | '&=' | '|=' | '~=' | '^=' | '<<=' | '>>=' | '%=' | '**=';
122ComparisonOperator: '>' | '<' | '>=' | '<=';
123BitshiftOperator: '>>' | '<<';
124
125IMAG: 'im';
126ImaginaryLiteral: (DecimalIntegerLiteral | FloatLiteral) ' '* IMAG;
127
128BinaryIntegerLiteral: ('0b' | '0B') ([01] '_'?)* [01];
129OctalIntegerLiteral: '0o' ([0-7] '_'?)* [0-7];
130DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];
131HexIntegerLiteral: ('0x' | '0X') ([0-9a-fA-F] '_'?)* [0-9a-fA-F];
132
133fragment ValidUnicode: [\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]; // valid unicode chars
134fragment Letter: [A-Za-z];
135fragment FirstIdCharacter: '_' | ValidUnicode | Letter;
136fragment GeneralIdCharacter: FirstIdCharacter | [0-9];
137
138Identifier: FirstIdCharacter GeneralIdCharacter*;
139HardwareQubit: '$' [0-9]+;
140
141fragment FloatLiteralExponent: [eE] (PLUS | MINUS)? DecimalIntegerLiteral;
142FloatLiteral:
143    // 1_123e-3, 123e+4 or 123E5 (needs the exponent or it's just an integer)
144    DecimalIntegerLiteral FloatLiteralExponent
145    // .1234_5678 or .1e3 (no digits before the dot)
146    | DOT DecimalIntegerLiteral FloatLiteralExponent?
147    // 123.456, 123. or 145.32e+1_00
148    | DecimalIntegerLiteral DOT DecimalIntegerLiteral? FloatLiteralExponent?;
149
150fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's';
151// represents explicit time value in SI or backend units
152TimingLiteral: (DecimalIntegerLiteral | FloatLiteral) TimeUnit;
153
154
155BitstringLiteral: '"' ([01] '_'?)* [01] '"';
156// allow ``"str"`` and ``'str'``
157StringLiteral
158    : '"' ~["\r\t\n]+? '"'
159    | '\'' ~['\r\t\n]+? '\''
160    ;
161
162// Ignore whitespace between tokens, and define C++-style comments.
163Whitespace: [ \t]+ -> skip ;
164Newline: [\r\n]+ -> skip ;
165LineComment : '//' ~[\r\n]* -> skip;
166BlockComment : '/*' .*? '*/' -> skip;
167
168
169// The version identifier token would be ambiguous between itself and
170// integer/floating-point literals, so we use a special mode to ensure it's
171// lexed correctly.
172mode VERSION_IDENTIFIER;
173    VERSION_IDENTIFER_WHITESPACE: [ \t\r\n]+ -> skip;
174    VersionSpecifier: [0-9]+ ('.' [0-9]+)? -> popMode;
175
176
177// A different lexer mode to swap to when we need handle tokens on a line basis
178// rather than the default arbitrary-whitespace-based tokenisation.  This is
179// used by the annotation and pragma rules.
180mode EAT_TO_LINE_END;
181    EAT_INITIAL_SPACE: [ \t]+ -> skip;
182    EAT_LINE_END: [\r\n] -> popMode, skip;
183
184    // The line content must be a non-empty token to satisfy ANTLR (otherwise it
185    // would be able to produce an infinite number of tokens).  We could include
186    // the line ending to guarantee that this is always non-empty, but that just
187    // puts an annoying burden on consumers to remove it again.
188    RemainingLineContent: ~[ \t\r\n] ~[\r\n]*;
189
190
191// We need to do a little context-aware lexing when we hit a `cal` or `defcal`
192// token.  In both cases, there's a small interlude before the pulse grammar
193// block starts, and we need to be able to lex our way through that.  We don't
194// want to tie this grammar to one host language by injecting host code to
195// manage the state of the lexer, so instead we need to do a little duplication
196// of the tokens, because ANTLR doesn't allow us to inherit rules directly.
197mode CAL_PRELUDE;
198    CAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
199    CAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
200    CAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
201
202mode DEFCAL_PRELUDE;
203    DEFCAL_PRELUDE_WHITESPACE: [ \t\r\n]+ -> skip;
204    DEFCAL_PRELUDE_COMMENT: (LineComment | BlockComment) -> skip;
205    DEFCAL_PRELUDE_LBRACE: LBRACE -> type(LBRACE), mode(CAL_BLOCK);
206
207    // Duplications of valid constant expression tokens that may appear in the
208    // argument list.  This is an unfortunately large number of duplications.
209
210    // Types.
211    DEFCAL_PRELUDE_QREG: QREG -> type(QREG);
212    DEFCAL_PRELUDE_QUBIT: QUBIT -> type(QUBIT);
213    DEFCAL_PRELUDE_CREG: CREG -> type(CREG);
214    DEFCAL_PRELUDE_BOOL: BOOL -> type(BOOL);
215    DEFCAL_PRELUDE_BIT: BIT -> type(BIT);
216    DEFCAL_PRELUDE_INT: INT -> type(INT);
217    DEFCAL_PRELUDE_UINT: UINT -> type(UINT);
218    DEFCAL_PRELUDE_ANGLE: ANGLE -> type(ANGLE);
219    DEFCAL_PRELUDE_FLOAT: FLOAT -> type(FLOAT);
220    DEFCAL_PRELUDE_COMPLEX: COMPLEX -> type(COMPLEX);
221    DEFCAL_PRELUDE_ARRAY: ARRAY -> type(ARRAY);
222    DEFCAL_PRELUDE_DURATION: DURATION -> type(DURATION);
223    // Punctuation.
224    DEFCAL_PRELUDE_LBRACKET: LBRACKET -> type(LBRACKET);
225    DEFCAL_PRELUDE_RBRACKET: RBRACKET -> type(RBRACKET);
226    DEFCAL_PRELUDE_LPAREN: LPAREN -> type(LPAREN);
227    DEFCAL_PRELUDE_RPAREN: RPAREN -> type(RPAREN);
228    DEFCAL_PRELUDE_ARROW: ARROW -> type(ARROW);
229    DEFCAL_PRELUDE_COMMA: COMMA -> type(COMMA);
230    DEFCAL_PRELUDE_PLUS: PLUS -> type(PLUS);
231    DEFCAL_PRELUDE_MINUS: MINUS -> type(MINUS);
232    DEFCAL_PRELUDE_ASTERISK: ASTERISK -> type(ASTERISK);
233    DEFCAL_PRELUDE_SLASH: SLASH -> type(SLASH);
234    DEFCAL_PRELUDE_BitshiftOperator: BitshiftOperator -> type(BitshiftOperator);
235    // Literals and names.
236    DEFCAL_PRELUDE_BitstringLiteral: BitstringLiteral -> type(BitstringLiteral);
237    DEFCAL_PRELUDE_BinaryIntegerLiteral: BinaryIntegerLiteral -> type(BinaryIntegerLiteral);
238    DEFCAL_PRELUDE_OctalIntegerLiteral: OctalIntegerLiteral -> type(OctalIntegerLiteral);
239    DEFCAL_PRELUDE_DecimalIntegerLiteral: DecimalIntegerLiteral -> type(DecimalIntegerLiteral);
240    DEFCAL_PRELUDE_HexIntegerLiteral: HexIntegerLiteral -> type(HexIntegerLiteral);
241    DEFCAL_PRELUDE_FloatLiteral: FloatLiteral -> type(FloatLiteral);
242    DEFCAL_PRELUDE_MEASURE: MEASURE -> type(MEASURE);
243    DEFCAL_PRELUDE_DELAY: DELAY -> type(DELAY);
244    DEFCAL_PRELUDE_RESET: RESET -> type(RESET);
245    DEFCAL_PRELUDE_Identifier: Identifier -> type(Identifier);
246    DEFCAL_PRELUDE_HardwareQubit: HardwareQubit -> type(HardwareQubit);
247
248
249// The meat-and-potatoes of matching a calibration block with balanced inner
250// braces.  We enter `CAL_BLOCK` with the opening brace already tokenised
251// (that's how the lexer knew to swap modes to us), and with the token left open
252// to continue to accumulate.  We want to tokenise until we hit the balancing
253// brace.  Since we have _no_ knowledge of what the inner langauge is doing,
254// things like unbalanced braces in comments will cause a failure, but there's
255// not much we can do about that without greater spec restrictions.
256mode CAL_BLOCK;
257    fragment NestedCalibrationBlock: LBRACE (NestedCalibrationBlock | ~[{}])* RBRACE;
258    CalibrationBlock: (NestedCalibrationBlock | ~[{}])+;
259    CAL_BLOCK_RBRACE: RBRACE -> type(RBRACE), mode(DEFAULT_MODE);