scannerc.go 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040
  1. //
  2. // Copyright (c) 2011-2019 Canonical Ltd
  3. // Copyright (c) 2006-2010 Kirill Simonov
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy of
  6. // this software and associated documentation files (the "Software"), to deal in
  7. // the Software without restriction, including without limitation the rights to
  8. // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  9. // of the Software, and to permit persons to whom the Software is furnished to do
  10. // so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in all
  13. // copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. // SOFTWARE.
  22. package yaml
  23. import (
  24. "bytes"
  25. "fmt"
  26. )
  27. // Introduction
  28. // ************
  29. //
  30. // The following notes assume that you are familiar with the YAML specification
  31. // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in
  32. // some cases we are less restrictive that it requires.
  33. //
  34. // The process of transforming a YAML stream into a sequence of events is
  35. // divided on two steps: Scanning and Parsing.
  36. //
  37. // The Scanner transforms the input stream into a sequence of tokens, while the
  38. // parser transform the sequence of tokens produced by the Scanner into a
  39. // sequence of parsing events.
  40. //
  41. // The Scanner is rather clever and complicated. The Parser, on the contrary,
  42. // is a straightforward implementation of a recursive-descendant parser (or,
  43. // LL(1) parser, as it is usually called).
  44. //
  45. // Actually there are two issues of Scanning that might be called "clever", the
  46. // rest is quite straightforward. The issues are "block collection start" and
  47. // "simple keys". Both issues are explained below in details.
  48. //
  49. // Here the Scanning step is explained and implemented. We start with the list
  50. // of all the tokens produced by the Scanner together with short descriptions.
  51. //
  52. // Now, tokens:
  53. //
  54. // STREAM-START(encoding) # The stream start.
  55. // STREAM-END # The stream end.
  56. // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
  57. // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
  58. // DOCUMENT-START # '---'
  59. // DOCUMENT-END # '...'
  60. // BLOCK-SEQUENCE-START # Indentation increase denoting a block
  61. // BLOCK-MAPPING-START # sequence or a block mapping.
  62. // BLOCK-END # Indentation decrease.
  63. // FLOW-SEQUENCE-START # '['
  64. // FLOW-SEQUENCE-END # ']'
  65. // BLOCK-SEQUENCE-START # '{'
  66. // BLOCK-SEQUENCE-END # '}'
  67. // BLOCK-ENTRY # '-'
  68. // FLOW-ENTRY # ','
  69. // KEY # '?' or nothing (simple keys).
  70. // VALUE # ':'
  71. // ALIAS(anchor) # '*anchor'
  72. // ANCHOR(anchor) # '&anchor'
  73. // TAG(handle,suffix) # '!handle!suffix'
  74. // SCALAR(value,style) # A scalar.
  75. //
  76. // The following two tokens are "virtual" tokens denoting the beginning and the
  77. // end of the stream:
  78. //
  79. // STREAM-START(encoding)
  80. // STREAM-END
  81. //
  82. // We pass the information about the input stream encoding with the
  83. // STREAM-START token.
  84. //
  85. // The next two tokens are responsible for tags:
  86. //
  87. // VERSION-DIRECTIVE(major,minor)
  88. // TAG-DIRECTIVE(handle,prefix)
  89. //
  90. // Example:
  91. //
  92. // %YAML 1.1
  93. // %TAG ! !foo
  94. // %TAG !yaml! tag:yaml.org,2002:
  95. // ---
  96. //
  97. // The correspoding sequence of tokens:
  98. //
  99. // STREAM-START(utf-8)
  100. // VERSION-DIRECTIVE(1,1)
  101. // TAG-DIRECTIVE("!","!foo")
  102. // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
  103. // DOCUMENT-START
  104. // STREAM-END
  105. //
  106. // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
  107. // line.
  108. //
  109. // The document start and end indicators are represented by:
  110. //
  111. // DOCUMENT-START
  112. // DOCUMENT-END
  113. //
  114. // Note that if a YAML stream contains an implicit document (without '---'
  115. // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
  116. // produced.
  117. //
  118. // In the following examples, we present whole documents together with the
  119. // produced tokens.
  120. //
  121. // 1. An implicit document:
  122. //
  123. // 'a scalar'
  124. //
  125. // Tokens:
  126. //
  127. // STREAM-START(utf-8)
  128. // SCALAR("a scalar",single-quoted)
  129. // STREAM-END
  130. //
  131. // 2. An explicit document:
  132. //
  133. // ---
  134. // 'a scalar'
  135. // ...
  136. //
  137. // Tokens:
  138. //
  139. // STREAM-START(utf-8)
  140. // DOCUMENT-START
  141. // SCALAR("a scalar",single-quoted)
  142. // DOCUMENT-END
  143. // STREAM-END
  144. //
  145. // 3. Several documents in a stream:
  146. //
  147. // 'a scalar'
  148. // ---
  149. // 'another scalar'
  150. // ---
  151. // 'yet another scalar'
  152. //
  153. // Tokens:
  154. //
  155. // STREAM-START(utf-8)
  156. // SCALAR("a scalar",single-quoted)
  157. // DOCUMENT-START
  158. // SCALAR("another scalar",single-quoted)
  159. // DOCUMENT-START
  160. // SCALAR("yet another scalar",single-quoted)
  161. // STREAM-END
  162. //
  163. // We have already introduced the SCALAR token above. The following tokens are
  164. // used to describe aliases, anchors, tag, and scalars:
  165. //
  166. // ALIAS(anchor)
  167. // ANCHOR(anchor)
  168. // TAG(handle,suffix)
  169. // SCALAR(value,style)
  170. //
  171. // The following series of examples illustrate the usage of these tokens:
  172. //
  173. // 1. A recursive sequence:
  174. //
  175. // &A [ *A ]
  176. //
  177. // Tokens:
  178. //
  179. // STREAM-START(utf-8)
  180. // ANCHOR("A")
  181. // FLOW-SEQUENCE-START
  182. // ALIAS("A")
  183. // FLOW-SEQUENCE-END
  184. // STREAM-END
  185. //
  186. // 2. A tagged scalar:
  187. //
  188. // !!float "3.14" # A good approximation.
  189. //
  190. // Tokens:
  191. //
  192. // STREAM-START(utf-8)
  193. // TAG("!!","float")
  194. // SCALAR("3.14",double-quoted)
  195. // STREAM-END
  196. //
  197. // 3. Various scalar styles:
  198. //
  199. // --- # Implicit empty plain scalars do not produce tokens.
  200. // --- a plain scalar
  201. // --- 'a single-quoted scalar'
  202. // --- "a double-quoted scalar"
  203. // --- |-
  204. // a literal scalar
  205. // --- >-
  206. // a folded
  207. // scalar
  208. //
  209. // Tokens:
  210. //
  211. // STREAM-START(utf-8)
  212. // DOCUMENT-START
  213. // DOCUMENT-START
  214. // SCALAR("a plain scalar",plain)
  215. // DOCUMENT-START
  216. // SCALAR("a single-quoted scalar",single-quoted)
  217. // DOCUMENT-START
  218. // SCALAR("a double-quoted scalar",double-quoted)
  219. // DOCUMENT-START
  220. // SCALAR("a literal scalar",literal)
  221. // DOCUMENT-START
  222. // SCALAR("a folded scalar",folded)
  223. // STREAM-END
  224. //
  225. // Now it's time to review collection-related tokens. We will start with
  226. // flow collections:
  227. //
  228. // FLOW-SEQUENCE-START
  229. // FLOW-SEQUENCE-END
  230. // FLOW-MAPPING-START
  231. // FLOW-MAPPING-END
  232. // FLOW-ENTRY
  233. // KEY
  234. // VALUE
  235. //
  236. // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
  237. // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
  238. // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
  239. // indicators '?' and ':', which are used for denoting mapping keys and values,
  240. // are represented by the KEY and VALUE tokens.
  241. //
  242. // The following examples show flow collections:
  243. //
  244. // 1. A flow sequence:
  245. //
  246. // [item 1, item 2, item 3]
  247. //
  248. // Tokens:
  249. //
  250. // STREAM-START(utf-8)
  251. // FLOW-SEQUENCE-START
  252. // SCALAR("item 1",plain)
  253. // FLOW-ENTRY
  254. // SCALAR("item 2",plain)
  255. // FLOW-ENTRY
  256. // SCALAR("item 3",plain)
  257. // FLOW-SEQUENCE-END
  258. // STREAM-END
  259. //
  260. // 2. A flow mapping:
  261. //
  262. // {
  263. // a simple key: a value, # Note that the KEY token is produced.
  264. // ? a complex key: another value,
  265. // }
  266. //
  267. // Tokens:
  268. //
  269. // STREAM-START(utf-8)
  270. // FLOW-MAPPING-START
  271. // KEY
  272. // SCALAR("a simple key",plain)
  273. // VALUE
  274. // SCALAR("a value",plain)
  275. // FLOW-ENTRY
  276. // KEY
  277. // SCALAR("a complex key",plain)
  278. // VALUE
  279. // SCALAR("another value",plain)
  280. // FLOW-ENTRY
  281. // FLOW-MAPPING-END
  282. // STREAM-END
  283. //
  284. // A simple key is a key which is not denoted by the '?' indicator. Note that
  285. // the Scanner still produce the KEY token whenever it encounters a simple key.
  286. //
  287. // For scanning block collections, the following tokens are used (note that we
  288. // repeat KEY and VALUE here):
  289. //
  290. // BLOCK-SEQUENCE-START
  291. // BLOCK-MAPPING-START
  292. // BLOCK-END
  293. // BLOCK-ENTRY
  294. // KEY
  295. // VALUE
  296. //
  297. // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
  298. // increase that precedes a block collection (cf. the INDENT token in Python).
  299. // The token BLOCK-END denote indentation decrease that ends a block collection
  300. // (cf. the DEDENT token in Python). However YAML has some syntax pecularities
  301. // that makes detections of these tokens more complex.
  302. //
  303. // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
  304. // '-', '?', and ':' correspondingly.
  305. //
  306. // The following examples show how the tokens BLOCK-SEQUENCE-START,
  307. // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
  308. //
  309. // 1. Block sequences:
  310. //
  311. // - item 1
  312. // - item 2
  313. // -
  314. // - item 3.1
  315. // - item 3.2
  316. // -
  317. // key 1: value 1
  318. // key 2: value 2
  319. //
  320. // Tokens:
  321. //
  322. // STREAM-START(utf-8)
  323. // BLOCK-SEQUENCE-START
  324. // BLOCK-ENTRY
  325. // SCALAR("item 1",plain)
  326. // BLOCK-ENTRY
  327. // SCALAR("item 2",plain)
  328. // BLOCK-ENTRY
  329. // BLOCK-SEQUENCE-START
  330. // BLOCK-ENTRY
  331. // SCALAR("item 3.1",plain)
  332. // BLOCK-ENTRY
  333. // SCALAR("item 3.2",plain)
  334. // BLOCK-END
  335. // BLOCK-ENTRY
  336. // BLOCK-MAPPING-START
  337. // KEY
  338. // SCALAR("key 1",plain)
  339. // VALUE
  340. // SCALAR("value 1",plain)
  341. // KEY
  342. // SCALAR("key 2",plain)
  343. // VALUE
  344. // SCALAR("value 2",plain)
  345. // BLOCK-END
  346. // BLOCK-END
  347. // STREAM-END
  348. //
  349. // 2. Block mappings:
  350. //
  351. // a simple key: a value # The KEY token is produced here.
  352. // ? a complex key
  353. // : another value
  354. // a mapping:
  355. // key 1: value 1
  356. // key 2: value 2
  357. // a sequence:
  358. // - item 1
  359. // - item 2
  360. //
  361. // Tokens:
  362. //
  363. // STREAM-START(utf-8)
  364. // BLOCK-MAPPING-START
  365. // KEY
  366. // SCALAR("a simple key",plain)
  367. // VALUE
  368. // SCALAR("a value",plain)
  369. // KEY
  370. // SCALAR("a complex key",plain)
  371. // VALUE
  372. // SCALAR("another value",plain)
  373. // KEY
  374. // SCALAR("a mapping",plain)
  375. // BLOCK-MAPPING-START
  376. // KEY
  377. // SCALAR("key 1",plain)
  378. // VALUE
  379. // SCALAR("value 1",plain)
  380. // KEY
  381. // SCALAR("key 2",plain)
  382. // VALUE
  383. // SCALAR("value 2",plain)
  384. // BLOCK-END
  385. // KEY
  386. // SCALAR("a sequence",plain)
  387. // VALUE
  388. // BLOCK-SEQUENCE-START
  389. // BLOCK-ENTRY
  390. // SCALAR("item 1",plain)
  391. // BLOCK-ENTRY
  392. // SCALAR("item 2",plain)
  393. // BLOCK-END
  394. // BLOCK-END
  395. // STREAM-END
  396. //
  397. // YAML does not always require to start a new block collection from a new
  398. // line. If the current line contains only '-', '?', and ':' indicators, a new
  399. // block collection may start at the current line. The following examples
  400. // illustrate this case:
  401. //
  402. // 1. Collections in a sequence:
  403. //
  404. // - - item 1
  405. // - item 2
  406. // - key 1: value 1
  407. // key 2: value 2
  408. // - ? complex key
  409. // : complex value
  410. //
  411. // Tokens:
  412. //
  413. // STREAM-START(utf-8)
  414. // BLOCK-SEQUENCE-START
  415. // BLOCK-ENTRY
  416. // BLOCK-SEQUENCE-START
  417. // BLOCK-ENTRY
  418. // SCALAR("item 1",plain)
  419. // BLOCK-ENTRY
  420. // SCALAR("item 2",plain)
  421. // BLOCK-END
  422. // BLOCK-ENTRY
  423. // BLOCK-MAPPING-START
  424. // KEY
  425. // SCALAR("key 1",plain)
  426. // VALUE
  427. // SCALAR("value 1",plain)
  428. // KEY
  429. // SCALAR("key 2",plain)
  430. // VALUE
  431. // SCALAR("value 2",plain)
  432. // BLOCK-END
  433. // BLOCK-ENTRY
  434. // BLOCK-MAPPING-START
  435. // KEY
  436. // SCALAR("complex key")
  437. // VALUE
  438. // SCALAR("complex value")
  439. // BLOCK-END
  440. // BLOCK-END
  441. // STREAM-END
  442. //
  443. // 2. Collections in a mapping:
  444. //
  445. // ? a sequence
  446. // : - item 1
  447. // - item 2
  448. // ? a mapping
  449. // : key 1: value 1
  450. // key 2: value 2
  451. //
  452. // Tokens:
  453. //
  454. // STREAM-START(utf-8)
  455. // BLOCK-MAPPING-START
  456. // KEY
  457. // SCALAR("a sequence",plain)
  458. // VALUE
  459. // BLOCK-SEQUENCE-START
  460. // BLOCK-ENTRY
  461. // SCALAR("item 1",plain)
  462. // BLOCK-ENTRY
  463. // SCALAR("item 2",plain)
  464. // BLOCK-END
  465. // KEY
  466. // SCALAR("a mapping",plain)
  467. // VALUE
  468. // BLOCK-MAPPING-START
  469. // KEY
  470. // SCALAR("key 1",plain)
  471. // VALUE
  472. // SCALAR("value 1",plain)
  473. // KEY
  474. // SCALAR("key 2",plain)
  475. // VALUE
  476. // SCALAR("value 2",plain)
  477. // BLOCK-END
  478. // BLOCK-END
  479. // STREAM-END
  480. //
  481. // YAML also permits non-indented sequences if they are included into a block
  482. // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
  483. //
  484. // key:
  485. // - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
  486. // - item 2
  487. //
  488. // Tokens:
  489. //
  490. // STREAM-START(utf-8)
  491. // BLOCK-MAPPING-START
  492. // KEY
  493. // SCALAR("key",plain)
  494. // VALUE
  495. // BLOCK-ENTRY
  496. // SCALAR("item 1",plain)
  497. // BLOCK-ENTRY
  498. // SCALAR("item 2",plain)
  499. // BLOCK-END
  500. //
  501. // Ensure that the buffer contains the required number of characters.
  502. // Return true on success, false on failure (reader error or memory error).
  503. func cache(parser *yaml_parser_t, length int) bool {
  504. // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)
  505. return parser.unread >= length || yaml_parser_update_buffer(parser, length)
  506. }
  507. // Advance the buffer pointer.
  508. func skip(parser *yaml_parser_t) {
  509. if !is_blank(parser.buffer, parser.buffer_pos) {
  510. parser.newlines = 0
  511. }
  512. parser.mark.index++
  513. parser.mark.column++
  514. parser.unread--
  515. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  516. }
  517. func skip_line(parser *yaml_parser_t) {
  518. if is_crlf(parser.buffer, parser.buffer_pos) {
  519. parser.mark.index += 2
  520. parser.mark.column = 0
  521. parser.mark.line++
  522. parser.unread -= 2
  523. parser.buffer_pos += 2
  524. parser.newlines++
  525. } else if is_break(parser.buffer, parser.buffer_pos) {
  526. parser.mark.index++
  527. parser.mark.column = 0
  528. parser.mark.line++
  529. parser.unread--
  530. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  531. parser.newlines++
  532. }
  533. }
  534. // Copy a character to a string buffer and advance pointers.
  535. func read(parser *yaml_parser_t, s []byte) []byte {
  536. if !is_blank(parser.buffer, parser.buffer_pos) {
  537. parser.newlines = 0
  538. }
  539. w := width(parser.buffer[parser.buffer_pos])
  540. if w == 0 {
  541. panic("invalid character sequence")
  542. }
  543. if len(s) == 0 {
  544. s = make([]byte, 0, 32)
  545. }
  546. if w == 1 && len(s)+w <= cap(s) {
  547. s = s[:len(s)+1]
  548. s[len(s)-1] = parser.buffer[parser.buffer_pos]
  549. parser.buffer_pos++
  550. } else {
  551. s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)
  552. parser.buffer_pos += w
  553. }
  554. parser.mark.index++
  555. parser.mark.column++
  556. parser.unread--
  557. return s
  558. }
  559. // Copy a line break character to a string buffer and advance pointers.
  560. func read_line(parser *yaml_parser_t, s []byte) []byte {
  561. buf := parser.buffer
  562. pos := parser.buffer_pos
  563. switch {
  564. case buf[pos] == '\r' && buf[pos+1] == '\n':
  565. // CR LF . LF
  566. s = append(s, '\n')
  567. parser.buffer_pos += 2
  568. parser.mark.index++
  569. parser.unread--
  570. case buf[pos] == '\r' || buf[pos] == '\n':
  571. // CR|LF . LF
  572. s = append(s, '\n')
  573. parser.buffer_pos += 1
  574. case buf[pos] == '\xC2' && buf[pos+1] == '\x85':
  575. // NEL . LF
  576. s = append(s, '\n')
  577. parser.buffer_pos += 2
  578. case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):
  579. // LS|PS . LS|PS
  580. s = append(s, buf[parser.buffer_pos:pos+3]...)
  581. parser.buffer_pos += 3
  582. default:
  583. return s
  584. }
  585. parser.mark.index++
  586. parser.mark.column = 0
  587. parser.mark.line++
  588. parser.unread--
  589. parser.newlines++
  590. return s
  591. }
  592. // Get the next token.
  593. func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {
  594. // Erase the token object.
  595. *token = yaml_token_t{} // [Go] Is this necessary?
  596. // No tokens after STREAM-END or error.
  597. if parser.stream_end_produced || parser.error != yaml_NO_ERROR {
  598. return true
  599. }
  600. // Ensure that the tokens queue contains enough tokens.
  601. if !parser.token_available {
  602. if !yaml_parser_fetch_more_tokens(parser) {
  603. return false
  604. }
  605. }
  606. // Fetch the next token from the queue.
  607. *token = parser.tokens[parser.tokens_head]
  608. parser.tokens_head++
  609. parser.tokens_parsed++
  610. parser.token_available = false
  611. if token.typ == yaml_STREAM_END_TOKEN {
  612. parser.stream_end_produced = true
  613. }
  614. return true
  615. }
  616. // Set the scanner error and return false.
  617. func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {
  618. parser.error = yaml_SCANNER_ERROR
  619. parser.context = context
  620. parser.context_mark = context_mark
  621. parser.problem = problem
  622. parser.problem_mark = parser.mark
  623. return false
  624. }
  625. func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {
  626. context := "while parsing a tag"
  627. if directive {
  628. context = "while parsing a %TAG directive"
  629. }
  630. return yaml_parser_set_scanner_error(parser, context, context_mark, problem)
  631. }
  632. func trace(args ...interface{}) func() {
  633. pargs := append([]interface{}{"+++"}, args...)
  634. fmt.Println(pargs...)
  635. pargs = append([]interface{}{"---"}, args...)
  636. return func() { fmt.Println(pargs...) }
  637. }
  638. // Ensure that the tokens queue contains at least one token which can be
  639. // returned to the Parser.
  640. func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
  641. // While we need more tokens to fetch, do it.
  642. for {
  643. // [Go] The comment parsing logic requires a lookahead of two tokens
  644. // so that foot comments may be parsed in time of associating them
  645. // with the tokens that are parsed before them, and also for line
  646. // comments to be transformed into head comments in some edge cases.
  647. if parser.tokens_head < len(parser.tokens)-2 {
  648. // If a potential simple key is at the head position, we need to fetch
  649. // the next token to disambiguate it.
  650. head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
  651. if !ok {
  652. break
  653. } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
  654. return false
  655. } else if !valid {
  656. break
  657. }
  658. }
  659. // Fetch the next token.
  660. if !yaml_parser_fetch_next_token(parser) {
  661. return false
  662. }
  663. }
  664. parser.token_available = true
  665. return true
  666. }
  667. // The dispatcher for token fetchers.
  668. func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
  669. // Ensure that the buffer is initialized.
  670. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  671. return false
  672. }
  673. // Check if we just started scanning. Fetch STREAM-START then.
  674. if !parser.stream_start_produced {
  675. return yaml_parser_fetch_stream_start(parser)
  676. }
  677. scan_mark := parser.mark
  678. // Eat whitespaces and comments until we reach the next token.
  679. if !yaml_parser_scan_to_next_token(parser) {
  680. return false
  681. }
  682. // [Go] While unrolling indents, transform the head comments of prior
  683. // indentation levels observed after scan_start into foot comments at
  684. // the respective indexes.
  685. // Check the indentation level against the current column.
  686. if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {
  687. return false
  688. }
  689. // Ensure that the buffer contains at least 4 characters. 4 is the length
  690. // of the longest indicators ('--- ' and '... ').
  691. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  692. return false
  693. }
  694. // Is it the end of the stream?
  695. if is_z(parser.buffer, parser.buffer_pos) {
  696. return yaml_parser_fetch_stream_end(parser)
  697. }
  698. // Is it a directive?
  699. if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {
  700. return yaml_parser_fetch_directive(parser)
  701. }
  702. buf := parser.buffer
  703. pos := parser.buffer_pos
  704. // Is it the document start indicator?
  705. if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {
  706. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)
  707. }
  708. // Is it the document end indicator?
  709. if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {
  710. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)
  711. }
  712. comment_mark := parser.mark
  713. if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') {
  714. // Associate any following comments with the prior token.
  715. comment_mark = parser.tokens[len(parser.tokens)-1].start_mark
  716. }
  717. defer func() {
  718. if !ok {
  719. return
  720. }
  721. if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN {
  722. // Sequence indicators alone have no line comments. It becomes
  723. // a head comment for whatever follows.
  724. return
  725. }
  726. if !yaml_parser_scan_line_comment(parser, comment_mark) {
  727. ok = false
  728. return
  729. }
  730. }()
  731. // Is it the flow sequence start indicator?
  732. if buf[pos] == '[' {
  733. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)
  734. }
  735. // Is it the flow mapping start indicator?
  736. if parser.buffer[parser.buffer_pos] == '{' {
  737. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)
  738. }
  739. // Is it the flow sequence end indicator?
  740. if parser.buffer[parser.buffer_pos] == ']' {
  741. return yaml_parser_fetch_flow_collection_end(parser,
  742. yaml_FLOW_SEQUENCE_END_TOKEN)
  743. }
  744. // Is it the flow mapping end indicator?
  745. if parser.buffer[parser.buffer_pos] == '}' {
  746. return yaml_parser_fetch_flow_collection_end(parser,
  747. yaml_FLOW_MAPPING_END_TOKEN)
  748. }
  749. // Is it the flow entry indicator?
  750. if parser.buffer[parser.buffer_pos] == ',' {
  751. return yaml_parser_fetch_flow_entry(parser)
  752. }
  753. // Is it the block entry indicator?
  754. if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {
  755. return yaml_parser_fetch_block_entry(parser)
  756. }
  757. // Is it the key indicator?
  758. if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  759. return yaml_parser_fetch_key(parser)
  760. }
  761. // Is it the value indicator?
  762. if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  763. return yaml_parser_fetch_value(parser)
  764. }
  765. // Is it an alias?
  766. if parser.buffer[parser.buffer_pos] == '*' {
  767. return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)
  768. }
  769. // Is it an anchor?
  770. if parser.buffer[parser.buffer_pos] == '&' {
  771. return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)
  772. }
  773. // Is it a tag?
  774. if parser.buffer[parser.buffer_pos] == '!' {
  775. return yaml_parser_fetch_tag(parser)
  776. }
  777. // Is it a literal scalar?
  778. if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {
  779. return yaml_parser_fetch_block_scalar(parser, true)
  780. }
  781. // Is it a folded scalar?
  782. if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {
  783. return yaml_parser_fetch_block_scalar(parser, false)
  784. }
  785. // Is it a single-quoted scalar?
  786. if parser.buffer[parser.buffer_pos] == '\'' {
  787. return yaml_parser_fetch_flow_scalar(parser, true)
  788. }
  789. // Is it a double-quoted scalar?
  790. if parser.buffer[parser.buffer_pos] == '"' {
  791. return yaml_parser_fetch_flow_scalar(parser, false)
  792. }
  793. // Is it a plain scalar?
  794. //
  795. // A plain scalar may start with any non-blank characters except
  796. //
  797. // '-', '?', ':', ',', '[', ']', '{', '}',
  798. // '#', '&', '*', '!', '|', '>', '\'', '\"',
  799. // '%', '@', '`'.
  800. //
  801. // In the block context (and, for the '-' indicator, in the flow context
  802. // too), it may also start with the characters
  803. //
  804. // '-', '?', ':'
  805. //
  806. // if it is followed by a non-space character.
  807. //
  808. // The last rule is more restrictive than the specification requires.
  809. // [Go] TODO Make this logic more reasonable.
  810. //switch parser.buffer[parser.buffer_pos] {
  811. //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':
  812. //}
  813. if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||
  814. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||
  815. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||
  816. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  817. parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||
  818. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||
  819. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||
  820. parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||
  821. parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||
  822. parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||
  823. (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||
  824. (parser.flow_level == 0 &&
  825. (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&
  826. !is_blankz(parser.buffer, parser.buffer_pos+1)) {
  827. return yaml_parser_fetch_plain_scalar(parser)
  828. }
  829. // If we don't determine the token type so far, it is an error.
  830. return yaml_parser_set_scanner_error(parser,
  831. "while scanning for the next token", parser.mark,
  832. "found character that cannot start any token")
  833. }
  834. func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {
  835. if !simple_key.possible {
  836. return false, true
  837. }
  838. // The 1.2 specification says:
  839. //
  840. // "If the ? indicator is omitted, parsing needs to see past the
  841. // implicit key to recognize it as such. To limit the amount of
  842. // lookahead required, the “:” indicator must appear at most 1024
  843. // Unicode characters beyond the start of the key. In addition, the key
  844. // is restricted to a single line."
  845. //
  846. if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
  847. // Check if the potential simple key to be removed is required.
  848. if simple_key.required {
  849. return false, yaml_parser_set_scanner_error(parser,
  850. "while scanning a simple key", simple_key.mark,
  851. "could not find expected ':'")
  852. }
  853. simple_key.possible = false
  854. return false, true
  855. }
  856. return true, true
  857. }
  858. // Check if a simple key may start at the current position and add it if
  859. // needed.
  860. func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
  861. // A simple key is required at the current position if the scanner is in
  862. // the block context and the current column coincides with the indentation
  863. // level.
  864. required := parser.flow_level == 0 && parser.indent == parser.mark.column
  865. //
  866. // If the current position may start a simple key, save it.
  867. //
  868. if parser.simple_key_allowed {
  869. simple_key := yaml_simple_key_t{
  870. possible: true,
  871. required: required,
  872. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  873. mark: parser.mark,
  874. }
  875. if !yaml_parser_remove_simple_key(parser) {
  876. return false
  877. }
  878. parser.simple_keys[len(parser.simple_keys)-1] = simple_key
  879. parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
  880. }
  881. return true
  882. }
  883. // Remove a potential simple key at the current flow level.
  884. func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
  885. i := len(parser.simple_keys) - 1
  886. if parser.simple_keys[i].possible {
  887. // If the key is required, it is an error.
  888. if parser.simple_keys[i].required {
  889. return yaml_parser_set_scanner_error(parser,
  890. "while scanning a simple key", parser.simple_keys[i].mark,
  891. "could not find expected ':'")
  892. }
  893. // Remove the key from the stack.
  894. parser.simple_keys[i].possible = false
  895. delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
  896. }
  897. return true
  898. }
  899. // max_flow_level limits the flow_level
  900. const max_flow_level = 10000
  901. // Increase the flow level and resize the simple key list if needed.
  902. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
  903. // Reset the simple key on the next level.
  904. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
  905. possible: false,
  906. required: false,
  907. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  908. mark: parser.mark,
  909. })
  910. // Increase the flow level.
  911. parser.flow_level++
  912. if parser.flow_level > max_flow_level {
  913. return yaml_parser_set_scanner_error(parser,
  914. "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  915. fmt.Sprintf("exceeded max depth of %d", max_flow_level))
  916. }
  917. return true
  918. }
  919. // Decrease the flow level.
  920. func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
  921. if parser.flow_level > 0 {
  922. parser.flow_level--
  923. last := len(parser.simple_keys) - 1
  924. delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
  925. parser.simple_keys = parser.simple_keys[:last]
  926. }
  927. return true
  928. }
  929. // max_indents limits the indents stack size
  930. const max_indents = 10000
  931. // Push the current indentation level to the stack and set the new level
  932. // the current column is greater than the indentation level. In this case,
  933. // append or insert the specified token into the token queue.
  934. func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {
  935. // In the flow context, do nothing.
  936. if parser.flow_level > 0 {
  937. return true
  938. }
  939. if parser.indent < column {
  940. // Push the current indentation level to the stack and set the new
  941. // indentation level.
  942. parser.indents = append(parser.indents, parser.indent)
  943. parser.indent = column
  944. if len(parser.indents) > max_indents {
  945. return yaml_parser_set_scanner_error(parser,
  946. "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  947. fmt.Sprintf("exceeded max depth of %d", max_indents))
  948. }
  949. // Create a token and insert it into the queue.
  950. token := yaml_token_t{
  951. typ: typ,
  952. start_mark: mark,
  953. end_mark: mark,
  954. }
  955. if number > -1 {
  956. number -= parser.tokens_parsed
  957. }
  958. yaml_insert_token(parser, number, &token)
  959. }
  960. return true
  961. }
  962. // Pop indentation levels from the indents stack until the current level
  963. // becomes less or equal to the column. For each indentation level, append
  964. // the BLOCK-END token.
  965. func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {
  966. // In the flow context, do nothing.
  967. if parser.flow_level > 0 {
  968. return true
  969. }
  970. block_mark := scan_mark
  971. block_mark.index--
  972. // Loop through the indentation levels in the stack.
  973. for parser.indent > column {
  974. // [Go] Reposition the end token before potential following
  975. // foot comments of parent blocks. For that, search
  976. // backwards for recent comments that were at the same
  977. // indent as the block that is ending now.
  978. stop_index := block_mark.index
  979. for i := len(parser.comments) - 1; i >= 0; i-- {
  980. comment := &parser.comments[i]
  981. if comment.end_mark.index < stop_index {
  982. // Don't go back beyond the start of the comment/whitespace scan, unless column < 0.
  983. // If requested indent column is < 0, then the document is over and everything else
  984. // is a foot anyway.
  985. break
  986. }
  987. if comment.start_mark.column == parser.indent+1 {
  988. // This is a good match. But maybe there's a former comment
  989. // at that same indent level, so keep searching.
  990. block_mark = comment.start_mark
  991. }
  992. // While the end of the former comment matches with
  993. // the start of the following one, we know there's
  994. // nothing in between and scanning is still safe.
  995. stop_index = comment.scan_mark.index
  996. }
  997. // Create a token and append it to the queue.
  998. token := yaml_token_t{
  999. typ: yaml_BLOCK_END_TOKEN,
  1000. start_mark: block_mark,
  1001. end_mark: block_mark,
  1002. }
  1003. yaml_insert_token(parser, -1, &token)
  1004. // Pop the indentation level.
  1005. parser.indent = parser.indents[len(parser.indents)-1]
  1006. parser.indents = parser.indents[:len(parser.indents)-1]
  1007. }
  1008. return true
  1009. }
  1010. // Initialize the scanner and produce the STREAM-START token.
  1011. func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
  1012. // Set the initial indentation.
  1013. parser.indent = -1
  1014. // Initialize the simple key stack.
  1015. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
  1016. parser.simple_keys_by_tok = make(map[int]int)
  1017. // A simple key is allowed at the beginning of the stream.
  1018. parser.simple_key_allowed = true
  1019. // We have started.
  1020. parser.stream_start_produced = true
  1021. // Create the STREAM-START token and append it to the queue.
  1022. token := yaml_token_t{
  1023. typ: yaml_STREAM_START_TOKEN,
  1024. start_mark: parser.mark,
  1025. end_mark: parser.mark,
  1026. encoding: parser.encoding,
  1027. }
  1028. yaml_insert_token(parser, -1, &token)
  1029. return true
  1030. }
  1031. // Produce the STREAM-END token and shut down the scanner.
  1032. func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
  1033. // Force new line.
  1034. if parser.mark.column != 0 {
  1035. parser.mark.column = 0
  1036. parser.mark.line++
  1037. }
  1038. // Reset the indentation level.
  1039. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1040. return false
  1041. }
  1042. // Reset simple keys.
  1043. if !yaml_parser_remove_simple_key(parser) {
  1044. return false
  1045. }
  1046. parser.simple_key_allowed = false
  1047. // Create the STREAM-END token and append it to the queue.
  1048. token := yaml_token_t{
  1049. typ: yaml_STREAM_END_TOKEN,
  1050. start_mark: parser.mark,
  1051. end_mark: parser.mark,
  1052. }
  1053. yaml_insert_token(parser, -1, &token)
  1054. return true
  1055. }
  1056. // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
  1057. func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
  1058. // Reset the indentation level.
  1059. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1060. return false
  1061. }
  1062. // Reset simple keys.
  1063. if !yaml_parser_remove_simple_key(parser) {
  1064. return false
  1065. }
  1066. parser.simple_key_allowed = false
  1067. // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1068. token := yaml_token_t{}
  1069. if !yaml_parser_scan_directive(parser, &token) {
  1070. return false
  1071. }
  1072. // Append the token to the queue.
  1073. yaml_insert_token(parser, -1, &token)
  1074. return true
  1075. }
  1076. // Produce the DOCUMENT-START or DOCUMENT-END token.
  1077. func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1078. // Reset the indentation level.
  1079. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1080. return false
  1081. }
  1082. // Reset simple keys.
  1083. if !yaml_parser_remove_simple_key(parser) {
  1084. return false
  1085. }
  1086. parser.simple_key_allowed = false
  1087. // Consume the token.
  1088. start_mark := parser.mark
  1089. skip(parser)
  1090. skip(parser)
  1091. skip(parser)
  1092. end_mark := parser.mark
  1093. // Create the DOCUMENT-START or DOCUMENT-END token.
  1094. token := yaml_token_t{
  1095. typ: typ,
  1096. start_mark: start_mark,
  1097. end_mark: end_mark,
  1098. }
  1099. // Append the token to the queue.
  1100. yaml_insert_token(parser, -1, &token)
  1101. return true
  1102. }
  1103. // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
  1104. func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1105. // The indicators '[' and '{' may start a simple key.
  1106. if !yaml_parser_save_simple_key(parser) {
  1107. return false
  1108. }
  1109. // Increase the flow level.
  1110. if !yaml_parser_increase_flow_level(parser) {
  1111. return false
  1112. }
  1113. // A simple key may follow the indicators '[' and '{'.
  1114. parser.simple_key_allowed = true
  1115. // Consume the token.
  1116. start_mark := parser.mark
  1117. skip(parser)
  1118. end_mark := parser.mark
  1119. // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.
  1120. token := yaml_token_t{
  1121. typ: typ,
  1122. start_mark: start_mark,
  1123. end_mark: end_mark,
  1124. }
  1125. // Append the token to the queue.
  1126. yaml_insert_token(parser, -1, &token)
  1127. return true
  1128. }
  1129. // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
  1130. func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1131. // Reset any potential simple key on the current flow level.
  1132. if !yaml_parser_remove_simple_key(parser) {
  1133. return false
  1134. }
  1135. // Decrease the flow level.
  1136. if !yaml_parser_decrease_flow_level(parser) {
  1137. return false
  1138. }
  1139. // No simple keys after the indicators ']' and '}'.
  1140. parser.simple_key_allowed = false
  1141. // Consume the token.
  1142. start_mark := parser.mark
  1143. skip(parser)
  1144. end_mark := parser.mark
  1145. // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.
  1146. token := yaml_token_t{
  1147. typ: typ,
  1148. start_mark: start_mark,
  1149. end_mark: end_mark,
  1150. }
  1151. // Append the token to the queue.
  1152. yaml_insert_token(parser, -1, &token)
  1153. return true
  1154. }
  1155. // Produce the FLOW-ENTRY token.
  1156. func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {
  1157. // Reset any potential simple keys on the current flow level.
  1158. if !yaml_parser_remove_simple_key(parser) {
  1159. return false
  1160. }
  1161. // Simple keys are allowed after ','.
  1162. parser.simple_key_allowed = true
  1163. // Consume the token.
  1164. start_mark := parser.mark
  1165. skip(parser)
  1166. end_mark := parser.mark
  1167. // Create the FLOW-ENTRY token and append it to the queue.
  1168. token := yaml_token_t{
  1169. typ: yaml_FLOW_ENTRY_TOKEN,
  1170. start_mark: start_mark,
  1171. end_mark: end_mark,
  1172. }
  1173. yaml_insert_token(parser, -1, &token)
  1174. return true
  1175. }
  1176. // Produce the BLOCK-ENTRY token.
  1177. func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {
  1178. // Check if the scanner is in the block context.
  1179. if parser.flow_level == 0 {
  1180. // Check if we are allowed to start a new entry.
  1181. if !parser.simple_key_allowed {
  1182. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1183. "block sequence entries are not allowed in this context")
  1184. }
  1185. // Add the BLOCK-SEQUENCE-START token if needed.
  1186. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {
  1187. return false
  1188. }
  1189. } else {
  1190. // It is an error for the '-' indicator to occur in the flow context,
  1191. // but we let the Parser detect and report about it because the Parser
  1192. // is able to point to the context.
  1193. }
  1194. // Reset any potential simple keys on the current flow level.
  1195. if !yaml_parser_remove_simple_key(parser) {
  1196. return false
  1197. }
  1198. // Simple keys are allowed after '-'.
  1199. parser.simple_key_allowed = true
  1200. // Consume the token.
  1201. start_mark := parser.mark
  1202. skip(parser)
  1203. end_mark := parser.mark
  1204. // Create the BLOCK-ENTRY token and append it to the queue.
  1205. token := yaml_token_t{
  1206. typ: yaml_BLOCK_ENTRY_TOKEN,
  1207. start_mark: start_mark,
  1208. end_mark: end_mark,
  1209. }
  1210. yaml_insert_token(parser, -1, &token)
  1211. return true
  1212. }
  1213. // Produce the KEY token.
  1214. func yaml_parser_fetch_key(parser *yaml_parser_t) bool {
  1215. // In the block context, additional checks are required.
  1216. if parser.flow_level == 0 {
  1217. // Check if we are allowed to start a new key (not nessesary simple).
  1218. if !parser.simple_key_allowed {
  1219. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1220. "mapping keys are not allowed in this context")
  1221. }
  1222. // Add the BLOCK-MAPPING-START token if needed.
  1223. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1224. return false
  1225. }
  1226. }
  1227. // Reset any potential simple keys on the current flow level.
  1228. if !yaml_parser_remove_simple_key(parser) {
  1229. return false
  1230. }
  1231. // Simple keys are allowed after '?' in the block context.
  1232. parser.simple_key_allowed = parser.flow_level == 0
  1233. // Consume the token.
  1234. start_mark := parser.mark
  1235. skip(parser)
  1236. end_mark := parser.mark
  1237. // Create the KEY token and append it to the queue.
  1238. token := yaml_token_t{
  1239. typ: yaml_KEY_TOKEN,
  1240. start_mark: start_mark,
  1241. end_mark: end_mark,
  1242. }
  1243. yaml_insert_token(parser, -1, &token)
  1244. return true
  1245. }
  1246. // Produce the VALUE token.
  1247. func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
  1248. simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
  1249. // Have we found a simple key?
  1250. if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
  1251. return false
  1252. } else if valid {
  1253. // Create the KEY token and insert it into the queue.
  1254. token := yaml_token_t{
  1255. typ: yaml_KEY_TOKEN,
  1256. start_mark: simple_key.mark,
  1257. end_mark: simple_key.mark,
  1258. }
  1259. yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)
  1260. // In the block context, we may need to add the BLOCK-MAPPING-START token.
  1261. if !yaml_parser_roll_indent(parser, simple_key.mark.column,
  1262. simple_key.token_number,
  1263. yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {
  1264. return false
  1265. }
  1266. // Remove the simple key.
  1267. simple_key.possible = false
  1268. delete(parser.simple_keys_by_tok, simple_key.token_number)
  1269. // A simple key cannot follow another simple key.
  1270. parser.simple_key_allowed = false
  1271. } else {
  1272. // The ':' indicator follows a complex key.
  1273. // In the block context, extra checks are required.
  1274. if parser.flow_level == 0 {
  1275. // Check if we are allowed to start a complex value.
  1276. if !parser.simple_key_allowed {
  1277. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1278. "mapping values are not allowed in this context")
  1279. }
  1280. // Add the BLOCK-MAPPING-START token if needed.
  1281. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1282. return false
  1283. }
  1284. }
  1285. // Simple keys after ':' are allowed in the block context.
  1286. parser.simple_key_allowed = parser.flow_level == 0
  1287. }
  1288. // Consume the token.
  1289. start_mark := parser.mark
  1290. skip(parser)
  1291. end_mark := parser.mark
  1292. // Create the VALUE token and append it to the queue.
  1293. token := yaml_token_t{
  1294. typ: yaml_VALUE_TOKEN,
  1295. start_mark: start_mark,
  1296. end_mark: end_mark,
  1297. }
  1298. yaml_insert_token(parser, -1, &token)
  1299. return true
  1300. }
  1301. // Produce the ALIAS or ANCHOR token.
  1302. func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1303. // An anchor or an alias could be a simple key.
  1304. if !yaml_parser_save_simple_key(parser) {
  1305. return false
  1306. }
  1307. // A simple key cannot follow an anchor or an alias.
  1308. parser.simple_key_allowed = false
  1309. // Create the ALIAS or ANCHOR token and append it to the queue.
  1310. var token yaml_token_t
  1311. if !yaml_parser_scan_anchor(parser, &token, typ) {
  1312. return false
  1313. }
  1314. yaml_insert_token(parser, -1, &token)
  1315. return true
  1316. }
  1317. // Produce the TAG token.
  1318. func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {
  1319. // A tag could be a simple key.
  1320. if !yaml_parser_save_simple_key(parser) {
  1321. return false
  1322. }
  1323. // A simple key cannot follow a tag.
  1324. parser.simple_key_allowed = false
  1325. // Create the TAG token and append it to the queue.
  1326. var token yaml_token_t
  1327. if !yaml_parser_scan_tag(parser, &token) {
  1328. return false
  1329. }
  1330. yaml_insert_token(parser, -1, &token)
  1331. return true
  1332. }
  1333. // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
  1334. func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {
  1335. // Remove any potential simple keys.
  1336. if !yaml_parser_remove_simple_key(parser) {
  1337. return false
  1338. }
  1339. // A simple key may follow a block scalar.
  1340. parser.simple_key_allowed = true
  1341. // Create the SCALAR token and append it to the queue.
  1342. var token yaml_token_t
  1343. if !yaml_parser_scan_block_scalar(parser, &token, literal) {
  1344. return false
  1345. }
  1346. yaml_insert_token(parser, -1, &token)
  1347. return true
  1348. }
  1349. // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
  1350. func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {
  1351. // A plain scalar could be a simple key.
  1352. if !yaml_parser_save_simple_key(parser) {
  1353. return false
  1354. }
  1355. // A simple key cannot follow a flow scalar.
  1356. parser.simple_key_allowed = false
  1357. // Create the SCALAR token and append it to the queue.
  1358. var token yaml_token_t
  1359. if !yaml_parser_scan_flow_scalar(parser, &token, single) {
  1360. return false
  1361. }
  1362. yaml_insert_token(parser, -1, &token)
  1363. return true
  1364. }
  1365. // Produce the SCALAR(...,plain) token.
  1366. func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
  1367. // A plain scalar could be a simple key.
  1368. if !yaml_parser_save_simple_key(parser) {
  1369. return false
  1370. }
  1371. // A simple key cannot follow a flow scalar.
  1372. parser.simple_key_allowed = false
  1373. // Create the SCALAR token and append it to the queue.
  1374. var token yaml_token_t
  1375. if !yaml_parser_scan_plain_scalar(parser, &token) {
  1376. return false
  1377. }
  1378. yaml_insert_token(parser, -1, &token)
  1379. return true
  1380. }
  1381. // Eat whitespaces and comments until the next token is found.
  1382. func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
  1383. scan_mark := parser.mark
  1384. // Until the next token is not found.
  1385. for {
  1386. // Allow the BOM mark to start a line.
  1387. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1388. return false
  1389. }
  1390. if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {
  1391. skip(parser)
  1392. }
  1393. // Eat whitespaces.
  1394. // Tabs are allowed:
  1395. // - in the flow context
  1396. // - in the block context, but not at the beginning of the line or
  1397. // after '-', '?', or ':' (complex value).
  1398. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1399. return false
  1400. }
  1401. for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {
  1402. skip(parser)
  1403. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1404. return false
  1405. }
  1406. }
  1407. // Check if we just had a line comment under a sequence entry that
  1408. // looks more like a header to the following content. Similar to this:
  1409. //
  1410. // - # The comment
  1411. // - Some data
  1412. //
  1413. // If so, transform the line comment to a head comment and reposition.
  1414. if len(parser.comments) > 0 && len(parser.tokens) > 1 {
  1415. tokenA := parser.tokens[len(parser.tokens)-2]
  1416. tokenB := parser.tokens[len(parser.tokens)-1]
  1417. comment := &parser.comments[len(parser.comments)-1]
  1418. if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
  1419. // If it was in the prior line, reposition so it becomes a
  1420. // header of the follow up token. Otherwise, keep it in place
  1421. // so it becomes a header of the former.
  1422. comment.head = comment.line
  1423. comment.line = nil
  1424. if comment.start_mark.line == parser.mark.line-1 {
  1425. comment.token_mark = parser.mark
  1426. }
  1427. }
  1428. }
  1429. // Eat a comment until a line break.
  1430. if parser.buffer[parser.buffer_pos] == '#' {
  1431. if !yaml_parser_scan_comments(parser, scan_mark) {
  1432. return false
  1433. }
  1434. }
  1435. // If it is a line break, eat it.
  1436. if is_break(parser.buffer, parser.buffer_pos) {
  1437. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1438. return false
  1439. }
  1440. skip_line(parser)
  1441. // In the block context, a new line may start a simple key.
  1442. if parser.flow_level == 0 {
  1443. parser.simple_key_allowed = true
  1444. }
  1445. } else {
  1446. break // We have found a token.
  1447. }
  1448. }
  1449. return true
  1450. }
  1451. // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1452. //
  1453. // Scope:
  1454. //
  1455. // %YAML 1.1 # a comment \n
  1456. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1457. // %TAG !yaml! tag:yaml.org,2002: \n
  1458. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1459. func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {
  1460. // Eat '%'.
  1461. start_mark := parser.mark
  1462. skip(parser)
  1463. // Scan the directive name.
  1464. var name []byte
  1465. if !yaml_parser_scan_directive_name(parser, start_mark, &name) {
  1466. return false
  1467. }
  1468. // Is it a YAML directive?
  1469. if bytes.Equal(name, []byte("YAML")) {
  1470. // Scan the VERSION directive value.
  1471. var major, minor int8
  1472. if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {
  1473. return false
  1474. }
  1475. end_mark := parser.mark
  1476. // Create a VERSION-DIRECTIVE token.
  1477. *token = yaml_token_t{
  1478. typ: yaml_VERSION_DIRECTIVE_TOKEN,
  1479. start_mark: start_mark,
  1480. end_mark: end_mark,
  1481. major: major,
  1482. minor: minor,
  1483. }
  1484. // Is it a TAG directive?
  1485. } else if bytes.Equal(name, []byte("TAG")) {
  1486. // Scan the TAG directive value.
  1487. var handle, prefix []byte
  1488. if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {
  1489. return false
  1490. }
  1491. end_mark := parser.mark
  1492. // Create a TAG-DIRECTIVE token.
  1493. *token = yaml_token_t{
  1494. typ: yaml_TAG_DIRECTIVE_TOKEN,
  1495. start_mark: start_mark,
  1496. end_mark: end_mark,
  1497. value: handle,
  1498. prefix: prefix,
  1499. }
  1500. // Unknown directive.
  1501. } else {
  1502. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1503. start_mark, "found unknown directive name")
  1504. return false
  1505. }
  1506. // Eat the rest of the line including any comments.
  1507. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1508. return false
  1509. }
  1510. for is_blank(parser.buffer, parser.buffer_pos) {
  1511. skip(parser)
  1512. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1513. return false
  1514. }
  1515. }
  1516. if parser.buffer[parser.buffer_pos] == '#' {
  1517. // [Go] Discard this inline comment for the time being.
  1518. //if !yaml_parser_scan_line_comment(parser, start_mark) {
  1519. // return false
  1520. //}
  1521. for !is_breakz(parser.buffer, parser.buffer_pos) {
  1522. skip(parser)
  1523. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1524. return false
  1525. }
  1526. }
  1527. }
  1528. // Check if we are at the end of the line.
  1529. if !is_breakz(parser.buffer, parser.buffer_pos) {
  1530. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1531. start_mark, "did not find expected comment or line break")
  1532. return false
  1533. }
  1534. // Eat a line break.
  1535. if is_break(parser.buffer, parser.buffer_pos) {
  1536. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1537. return false
  1538. }
  1539. skip_line(parser)
  1540. }
  1541. return true
  1542. }
  1543. // Scan the directive name.
  1544. //
  1545. // Scope:
  1546. //
  1547. // %YAML 1.1 # a comment \n
  1548. // ^^^^
  1549. // %TAG !yaml! tag:yaml.org,2002: \n
  1550. // ^^^
  1551. func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {
  1552. // Consume the directive name.
  1553. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1554. return false
  1555. }
  1556. var s []byte
  1557. for is_alpha(parser.buffer, parser.buffer_pos) {
  1558. s = read(parser, s)
  1559. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1560. return false
  1561. }
  1562. }
  1563. // Check if the name is empty.
  1564. if len(s) == 0 {
  1565. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1566. start_mark, "could not find expected directive name")
  1567. return false
  1568. }
  1569. // Check for an blank character after the name.
  1570. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1571. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1572. start_mark, "found unexpected non-alphabetical character")
  1573. return false
  1574. }
  1575. *name = s
  1576. return true
  1577. }
  1578. // Scan the value of VERSION-DIRECTIVE.
  1579. //
  1580. // Scope:
  1581. //
  1582. // %YAML 1.1 # a comment \n
  1583. // ^^^^^^
  1584. func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {
  1585. // Eat whitespaces.
  1586. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1587. return false
  1588. }
  1589. for is_blank(parser.buffer, parser.buffer_pos) {
  1590. skip(parser)
  1591. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1592. return false
  1593. }
  1594. }
  1595. // Consume the major version number.
  1596. if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {
  1597. return false
  1598. }
  1599. // Eat '.'.
  1600. if parser.buffer[parser.buffer_pos] != '.' {
  1601. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1602. start_mark, "did not find expected digit or '.' character")
  1603. }
  1604. skip(parser)
  1605. // Consume the minor version number.
  1606. if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {
  1607. return false
  1608. }
  1609. return true
  1610. }
  1611. const max_number_length = 2
  1612. // Scan the version number of VERSION-DIRECTIVE.
  1613. //
  1614. // Scope:
  1615. //
  1616. // %YAML 1.1 # a comment \n
  1617. // ^
  1618. // %YAML 1.1 # a comment \n
  1619. // ^
  1620. func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {
  1621. // Repeat while the next character is digit.
  1622. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1623. return false
  1624. }
  1625. var value, length int8
  1626. for is_digit(parser.buffer, parser.buffer_pos) {
  1627. // Check if the number is too long.
  1628. length++
  1629. if length > max_number_length {
  1630. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1631. start_mark, "found extremely long version number")
  1632. }
  1633. value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))
  1634. skip(parser)
  1635. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1636. return false
  1637. }
  1638. }
  1639. // Check if the number was present.
  1640. if length == 0 {
  1641. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1642. start_mark, "did not find expected version number")
  1643. }
  1644. *number = value
  1645. return true
  1646. }
  1647. // Scan the value of a TAG-DIRECTIVE token.
  1648. //
  1649. // Scope:
  1650. //
  1651. // %TAG !yaml! tag:yaml.org,2002: \n
  1652. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1653. func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {
  1654. var handle_value, prefix_value []byte
  1655. // Eat whitespaces.
  1656. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1657. return false
  1658. }
  1659. for is_blank(parser.buffer, parser.buffer_pos) {
  1660. skip(parser)
  1661. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1662. return false
  1663. }
  1664. }
  1665. // Scan a handle.
  1666. if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {
  1667. return false
  1668. }
  1669. // Expect a whitespace.
  1670. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1671. return false
  1672. }
  1673. if !is_blank(parser.buffer, parser.buffer_pos) {
  1674. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1675. start_mark, "did not find expected whitespace")
  1676. return false
  1677. }
  1678. // Eat whitespaces.
  1679. for is_blank(parser.buffer, parser.buffer_pos) {
  1680. skip(parser)
  1681. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1682. return false
  1683. }
  1684. }
  1685. // Scan a prefix.
  1686. if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {
  1687. return false
  1688. }
  1689. // Expect a whitespace or line break.
  1690. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1691. return false
  1692. }
  1693. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1694. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1695. start_mark, "did not find expected whitespace or line break")
  1696. return false
  1697. }
  1698. *handle = handle_value
  1699. *prefix = prefix_value
  1700. return true
  1701. }
  1702. func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {
  1703. var s []byte
  1704. // Eat the indicator character.
  1705. start_mark := parser.mark
  1706. skip(parser)
  1707. // Consume the value.
  1708. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1709. return false
  1710. }
  1711. for is_alpha(parser.buffer, parser.buffer_pos) {
  1712. s = read(parser, s)
  1713. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1714. return false
  1715. }
  1716. }
  1717. end_mark := parser.mark
  1718. /*
  1719. * Check if length of the anchor is greater than 0 and it is followed by
  1720. * a whitespace character or one of the indicators:
  1721. *
  1722. * '?', ':', ',', ']', '}', '%', '@', '`'.
  1723. */
  1724. if len(s) == 0 ||
  1725. !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||
  1726. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||
  1727. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||
  1728. parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||
  1729. parser.buffer[parser.buffer_pos] == '`') {
  1730. context := "while scanning an alias"
  1731. if typ == yaml_ANCHOR_TOKEN {
  1732. context = "while scanning an anchor"
  1733. }
  1734. yaml_parser_set_scanner_error(parser, context, start_mark,
  1735. "did not find expected alphabetic or numeric character")
  1736. return false
  1737. }
  1738. // Create a token.
  1739. *token = yaml_token_t{
  1740. typ: typ,
  1741. start_mark: start_mark,
  1742. end_mark: end_mark,
  1743. value: s,
  1744. }
  1745. return true
  1746. }
  1747. /*
  1748. * Scan a TAG token.
  1749. */
  1750. func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {
  1751. var handle, suffix []byte
  1752. start_mark := parser.mark
  1753. // Check if the tag is in the canonical form.
  1754. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1755. return false
  1756. }
  1757. if parser.buffer[parser.buffer_pos+1] == '<' {
  1758. // Keep the handle as ''
  1759. // Eat '!<'
  1760. skip(parser)
  1761. skip(parser)
  1762. // Consume the tag value.
  1763. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1764. return false
  1765. }
  1766. // Check for '>' and eat it.
  1767. if parser.buffer[parser.buffer_pos] != '>' {
  1768. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1769. start_mark, "did not find the expected '>'")
  1770. return false
  1771. }
  1772. skip(parser)
  1773. } else {
  1774. // The tag has either the '!suffix' or the '!handle!suffix' form.
  1775. // First, try to scan a handle.
  1776. if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {
  1777. return false
  1778. }
  1779. // Check if it is, indeed, handle.
  1780. if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {
  1781. // Scan the suffix now.
  1782. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1783. return false
  1784. }
  1785. } else {
  1786. // It wasn't a handle after all. Scan the rest of the tag.
  1787. if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {
  1788. return false
  1789. }
  1790. // Set the handle to '!'.
  1791. handle = []byte{'!'}
  1792. // A special case: the '!' tag. Set the handle to '' and the
  1793. // suffix to '!'.
  1794. if len(suffix) == 0 {
  1795. handle, suffix = suffix, handle
  1796. }
  1797. }
  1798. }
  1799. // Check the character which ends the tag.
  1800. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1801. return false
  1802. }
  1803. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1804. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1805. start_mark, "did not find expected whitespace or line break")
  1806. return false
  1807. }
  1808. end_mark := parser.mark
  1809. // Create a token.
  1810. *token = yaml_token_t{
  1811. typ: yaml_TAG_TOKEN,
  1812. start_mark: start_mark,
  1813. end_mark: end_mark,
  1814. value: handle,
  1815. suffix: suffix,
  1816. }
  1817. return true
  1818. }
  1819. // Scan a tag handle.
  1820. func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {
  1821. // Check the initial '!' character.
  1822. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1823. return false
  1824. }
  1825. if parser.buffer[parser.buffer_pos] != '!' {
  1826. yaml_parser_set_scanner_tag_error(parser, directive,
  1827. start_mark, "did not find expected '!'")
  1828. return false
  1829. }
  1830. var s []byte
  1831. // Copy the '!' character.
  1832. s = read(parser, s)
  1833. // Copy all subsequent alphabetical and numerical characters.
  1834. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1835. return false
  1836. }
  1837. for is_alpha(parser.buffer, parser.buffer_pos) {
  1838. s = read(parser, s)
  1839. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1840. return false
  1841. }
  1842. }
  1843. // Check if the trailing character is '!' and copy it.
  1844. if parser.buffer[parser.buffer_pos] == '!' {
  1845. s = read(parser, s)
  1846. } else {
  1847. // It's either the '!' tag or not really a tag handle. If it's a %TAG
  1848. // directive, it's an error. If it's a tag token, it must be a part of URI.
  1849. if directive && string(s) != "!" {
  1850. yaml_parser_set_scanner_tag_error(parser, directive,
  1851. start_mark, "did not find expected '!'")
  1852. return false
  1853. }
  1854. }
  1855. *handle = s
  1856. return true
  1857. }
  1858. // Scan a tag.
  1859. func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {
  1860. //size_t length = head ? strlen((char *)head) : 0
  1861. var s []byte
  1862. hasTag := len(head) > 0
  1863. // Copy the head if needed.
  1864. //
  1865. // Note that we don't copy the leading '!' character.
  1866. if len(head) > 1 {
  1867. s = append(s, head[1:]...)
  1868. }
  1869. // Scan the tag.
  1870. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1871. return false
  1872. }
  1873. // The set of characters that may appear in URI is as follows:
  1874. //
  1875. // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
  1876. // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
  1877. // '%'.
  1878. // [Go] TODO Convert this into more reasonable logic.
  1879. for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||
  1880. parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||
  1881. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||
  1882. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||
  1883. parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||
  1884. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||
  1885. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||
  1886. parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||
  1887. parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||
  1888. parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||
  1889. parser.buffer[parser.buffer_pos] == '%' {
  1890. // Check if it is a URI-escape sequence.
  1891. if parser.buffer[parser.buffer_pos] == '%' {
  1892. if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {
  1893. return false
  1894. }
  1895. } else {
  1896. s = read(parser, s)
  1897. }
  1898. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1899. return false
  1900. }
  1901. hasTag = true
  1902. }
  1903. if !hasTag {
  1904. yaml_parser_set_scanner_tag_error(parser, directive,
  1905. start_mark, "did not find expected tag URI")
  1906. return false
  1907. }
  1908. *uri = s
  1909. return true
  1910. }
  1911. // Decode an URI-escape sequence corresponding to a single UTF-8 character.
  1912. func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {
  1913. // Decode the required number of characters.
  1914. w := 1024
  1915. for w > 0 {
  1916. // Check for a URI-escaped octet.
  1917. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  1918. return false
  1919. }
  1920. if !(parser.buffer[parser.buffer_pos] == '%' &&
  1921. is_hex(parser.buffer, parser.buffer_pos+1) &&
  1922. is_hex(parser.buffer, parser.buffer_pos+2)) {
  1923. return yaml_parser_set_scanner_tag_error(parser, directive,
  1924. start_mark, "did not find URI escaped octet")
  1925. }
  1926. // Get the octet.
  1927. octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))
  1928. // If it is the leading octet, determine the length of the UTF-8 sequence.
  1929. if w == 1024 {
  1930. w = width(octet)
  1931. if w == 0 {
  1932. return yaml_parser_set_scanner_tag_error(parser, directive,
  1933. start_mark, "found an incorrect leading UTF-8 octet")
  1934. }
  1935. } else {
  1936. // Check if the trailing octet is correct.
  1937. if octet&0xC0 != 0x80 {
  1938. return yaml_parser_set_scanner_tag_error(parser, directive,
  1939. start_mark, "found an incorrect trailing UTF-8 octet")
  1940. }
  1941. }
  1942. // Copy the octet and move the pointers.
  1943. *s = append(*s, octet)
  1944. skip(parser)
  1945. skip(parser)
  1946. skip(parser)
  1947. w--
  1948. }
  1949. return true
  1950. }
  1951. // Scan a block scalar.
  1952. func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {
  1953. // Eat the indicator '|' or '>'.
  1954. start_mark := parser.mark
  1955. skip(parser)
  1956. // Scan the additional block scalar indicators.
  1957. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1958. return false
  1959. }
  1960. // Check for a chomping indicator.
  1961. var chomping, increment int
  1962. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1963. // Set the chomping method and eat the indicator.
  1964. if parser.buffer[parser.buffer_pos] == '+' {
  1965. chomping = +1
  1966. } else {
  1967. chomping = -1
  1968. }
  1969. skip(parser)
  1970. // Check for an indentation indicator.
  1971. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1972. return false
  1973. }
  1974. if is_digit(parser.buffer, parser.buffer_pos) {
  1975. // Check that the indentation is greater than 0.
  1976. if parser.buffer[parser.buffer_pos] == '0' {
  1977. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1978. start_mark, "found an indentation indicator equal to 0")
  1979. return false
  1980. }
  1981. // Get the indentation level and eat the indicator.
  1982. increment = as_digit(parser.buffer, parser.buffer_pos)
  1983. skip(parser)
  1984. }
  1985. } else if is_digit(parser.buffer, parser.buffer_pos) {
  1986. // Do the same as above, but in the opposite order.
  1987. if parser.buffer[parser.buffer_pos] == '0' {
  1988. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1989. start_mark, "found an indentation indicator equal to 0")
  1990. return false
  1991. }
  1992. increment = as_digit(parser.buffer, parser.buffer_pos)
  1993. skip(parser)
  1994. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1995. return false
  1996. }
  1997. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1998. if parser.buffer[parser.buffer_pos] == '+' {
  1999. chomping = +1
  2000. } else {
  2001. chomping = -1
  2002. }
  2003. skip(parser)
  2004. }
  2005. }
  2006. // Eat whitespaces and comments to the end of the line.
  2007. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2008. return false
  2009. }
  2010. for is_blank(parser.buffer, parser.buffer_pos) {
  2011. skip(parser)
  2012. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2013. return false
  2014. }
  2015. }
  2016. if parser.buffer[parser.buffer_pos] == '#' {
  2017. if !yaml_parser_scan_line_comment(parser, start_mark) {
  2018. return false
  2019. }
  2020. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2021. skip(parser)
  2022. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2023. return false
  2024. }
  2025. }
  2026. }
  2027. // Check if we are at the end of the line.
  2028. if !is_breakz(parser.buffer, parser.buffer_pos) {
  2029. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2030. start_mark, "did not find expected comment or line break")
  2031. return false
  2032. }
  2033. // Eat a line break.
  2034. if is_break(parser.buffer, parser.buffer_pos) {
  2035. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2036. return false
  2037. }
  2038. skip_line(parser)
  2039. }
  2040. end_mark := parser.mark
  2041. // Set the indentation level if it was specified.
  2042. var indent int
  2043. if increment > 0 {
  2044. if parser.indent >= 0 {
  2045. indent = parser.indent + increment
  2046. } else {
  2047. indent = increment
  2048. }
  2049. }
  2050. // Scan the leading line breaks and determine the indentation level if needed.
  2051. var s, leading_break, trailing_breaks []byte
  2052. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2053. return false
  2054. }
  2055. // Scan the block scalar content.
  2056. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2057. return false
  2058. }
  2059. var leading_blank, trailing_blank bool
  2060. for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {
  2061. // We are at the beginning of a non-empty line.
  2062. // Is it a trailing whitespace?
  2063. trailing_blank = is_blank(parser.buffer, parser.buffer_pos)
  2064. // Check if we need to fold the leading line break.
  2065. if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {
  2066. // Do we need to join the lines by space?
  2067. if len(trailing_breaks) == 0 {
  2068. s = append(s, ' ')
  2069. }
  2070. } else {
  2071. s = append(s, leading_break...)
  2072. }
  2073. leading_break = leading_break[:0]
  2074. // Append the remaining line breaks.
  2075. s = append(s, trailing_breaks...)
  2076. trailing_breaks = trailing_breaks[:0]
  2077. // Is it a leading whitespace?
  2078. leading_blank = is_blank(parser.buffer, parser.buffer_pos)
  2079. // Consume the current line.
  2080. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2081. s = read(parser, s)
  2082. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2083. return false
  2084. }
  2085. }
  2086. // Consume the line break.
  2087. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2088. return false
  2089. }
  2090. leading_break = read_line(parser, leading_break)
  2091. // Eat the following indentation spaces and line breaks.
  2092. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2093. return false
  2094. }
  2095. }
  2096. // Chomp the tail.
  2097. if chomping != -1 {
  2098. s = append(s, leading_break...)
  2099. }
  2100. if chomping == 1 {
  2101. s = append(s, trailing_breaks...)
  2102. }
  2103. // Create a token.
  2104. *token = yaml_token_t{
  2105. typ: yaml_SCALAR_TOKEN,
  2106. start_mark: start_mark,
  2107. end_mark: end_mark,
  2108. value: s,
  2109. style: yaml_LITERAL_SCALAR_STYLE,
  2110. }
  2111. if !literal {
  2112. token.style = yaml_FOLDED_SCALAR_STYLE
  2113. }
  2114. return true
  2115. }
  2116. // Scan indentation spaces and line breaks for a block scalar. Determine the
  2117. // indentation level if needed.
  2118. func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {
  2119. *end_mark = parser.mark
  2120. // Eat the indentation spaces and line breaks.
  2121. max_indent := 0
  2122. for {
  2123. // Eat the indentation spaces.
  2124. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2125. return false
  2126. }
  2127. for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {
  2128. skip(parser)
  2129. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2130. return false
  2131. }
  2132. }
  2133. if parser.mark.column > max_indent {
  2134. max_indent = parser.mark.column
  2135. }
  2136. // Check for a tab character messing the indentation.
  2137. if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {
  2138. return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2139. start_mark, "found a tab character where an indentation space is expected")
  2140. }
  2141. // Have we found a non-empty line?
  2142. if !is_break(parser.buffer, parser.buffer_pos) {
  2143. break
  2144. }
  2145. // Consume the line break.
  2146. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2147. return false
  2148. }
  2149. // [Go] Should really be returning breaks instead.
  2150. *breaks = read_line(parser, *breaks)
  2151. *end_mark = parser.mark
  2152. }
  2153. // Determine the indentation level if needed.
  2154. if *indent == 0 {
  2155. *indent = max_indent
  2156. if *indent < parser.indent+1 {
  2157. *indent = parser.indent + 1
  2158. }
  2159. if *indent < 1 {
  2160. *indent = 1
  2161. }
  2162. }
  2163. return true
  2164. }
  2165. // Scan a quoted scalar.
  2166. func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {
  2167. // Eat the left quote.
  2168. start_mark := parser.mark
  2169. skip(parser)
  2170. // Consume the content of the quoted scalar.
  2171. var s, leading_break, trailing_breaks, whitespaces []byte
  2172. for {
  2173. // Check that there are no document indicators at the beginning of the line.
  2174. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2175. return false
  2176. }
  2177. if parser.mark.column == 0 &&
  2178. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2179. parser.buffer[parser.buffer_pos+1] == '-' &&
  2180. parser.buffer[parser.buffer_pos+2] == '-') ||
  2181. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2182. parser.buffer[parser.buffer_pos+1] == '.' &&
  2183. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2184. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2185. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2186. start_mark, "found unexpected document indicator")
  2187. return false
  2188. }
  2189. // Check for EOF.
  2190. if is_z(parser.buffer, parser.buffer_pos) {
  2191. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2192. start_mark, "found unexpected end of stream")
  2193. return false
  2194. }
  2195. // Consume non-blank characters.
  2196. leading_blanks := false
  2197. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2198. if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {
  2199. // Is is an escaped single quote.
  2200. s = append(s, '\'')
  2201. skip(parser)
  2202. skip(parser)
  2203. } else if single && parser.buffer[parser.buffer_pos] == '\'' {
  2204. // It is a right single quote.
  2205. break
  2206. } else if !single && parser.buffer[parser.buffer_pos] == '"' {
  2207. // It is a right double quote.
  2208. break
  2209. } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {
  2210. // It is an escaped line break.
  2211. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  2212. return false
  2213. }
  2214. skip(parser)
  2215. skip_line(parser)
  2216. leading_blanks = true
  2217. break
  2218. } else if !single && parser.buffer[parser.buffer_pos] == '\\' {
  2219. // It is an escape sequence.
  2220. code_length := 0
  2221. // Check the escape character.
  2222. switch parser.buffer[parser.buffer_pos+1] {
  2223. case '0':
  2224. s = append(s, 0)
  2225. case 'a':
  2226. s = append(s, '\x07')
  2227. case 'b':
  2228. s = append(s, '\x08')
  2229. case 't', '\t':
  2230. s = append(s, '\x09')
  2231. case 'n':
  2232. s = append(s, '\x0A')
  2233. case 'v':
  2234. s = append(s, '\x0B')
  2235. case 'f':
  2236. s = append(s, '\x0C')
  2237. case 'r':
  2238. s = append(s, '\x0D')
  2239. case 'e':
  2240. s = append(s, '\x1B')
  2241. case ' ':
  2242. s = append(s, '\x20')
  2243. case '"':
  2244. s = append(s, '"')
  2245. case '\'':
  2246. s = append(s, '\'')
  2247. case '\\':
  2248. s = append(s, '\\')
  2249. case 'N': // NEL (#x85)
  2250. s = append(s, '\xC2')
  2251. s = append(s, '\x85')
  2252. case '_': // #xA0
  2253. s = append(s, '\xC2')
  2254. s = append(s, '\xA0')
  2255. case 'L': // LS (#x2028)
  2256. s = append(s, '\xE2')
  2257. s = append(s, '\x80')
  2258. s = append(s, '\xA8')
  2259. case 'P': // PS (#x2029)
  2260. s = append(s, '\xE2')
  2261. s = append(s, '\x80')
  2262. s = append(s, '\xA9')
  2263. case 'x':
  2264. code_length = 2
  2265. case 'u':
  2266. code_length = 4
  2267. case 'U':
  2268. code_length = 8
  2269. default:
  2270. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2271. start_mark, "found unknown escape character")
  2272. return false
  2273. }
  2274. skip(parser)
  2275. skip(parser)
  2276. // Consume an arbitrary escape code.
  2277. if code_length > 0 {
  2278. var value int
  2279. // Scan the character value.
  2280. if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {
  2281. return false
  2282. }
  2283. for k := 0; k < code_length; k++ {
  2284. if !is_hex(parser.buffer, parser.buffer_pos+k) {
  2285. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2286. start_mark, "did not find expected hexdecimal number")
  2287. return false
  2288. }
  2289. value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)
  2290. }
  2291. // Check the value and write the character.
  2292. if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {
  2293. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2294. start_mark, "found invalid Unicode character escape code")
  2295. return false
  2296. }
  2297. if value <= 0x7F {
  2298. s = append(s, byte(value))
  2299. } else if value <= 0x7FF {
  2300. s = append(s, byte(0xC0+(value>>6)))
  2301. s = append(s, byte(0x80+(value&0x3F)))
  2302. } else if value <= 0xFFFF {
  2303. s = append(s, byte(0xE0+(value>>12)))
  2304. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2305. s = append(s, byte(0x80+(value&0x3F)))
  2306. } else {
  2307. s = append(s, byte(0xF0+(value>>18)))
  2308. s = append(s, byte(0x80+((value>>12)&0x3F)))
  2309. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2310. s = append(s, byte(0x80+(value&0x3F)))
  2311. }
  2312. // Advance the pointer.
  2313. for k := 0; k < code_length; k++ {
  2314. skip(parser)
  2315. }
  2316. }
  2317. } else {
  2318. // It is a non-escaped non-blank character.
  2319. s = read(parser, s)
  2320. }
  2321. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2322. return false
  2323. }
  2324. }
  2325. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2326. return false
  2327. }
  2328. // Check if we are at the end of the scalar.
  2329. if single {
  2330. if parser.buffer[parser.buffer_pos] == '\'' {
  2331. break
  2332. }
  2333. } else {
  2334. if parser.buffer[parser.buffer_pos] == '"' {
  2335. break
  2336. }
  2337. }
  2338. // Consume blank characters.
  2339. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2340. if is_blank(parser.buffer, parser.buffer_pos) {
  2341. // Consume a space or a tab character.
  2342. if !leading_blanks {
  2343. whitespaces = read(parser, whitespaces)
  2344. } else {
  2345. skip(parser)
  2346. }
  2347. } else {
  2348. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2349. return false
  2350. }
  2351. // Check if it is a first line break.
  2352. if !leading_blanks {
  2353. whitespaces = whitespaces[:0]
  2354. leading_break = read_line(parser, leading_break)
  2355. leading_blanks = true
  2356. } else {
  2357. trailing_breaks = read_line(parser, trailing_breaks)
  2358. }
  2359. }
  2360. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2361. return false
  2362. }
  2363. }
  2364. // Join the whitespaces or fold line breaks.
  2365. if leading_blanks {
  2366. // Do we need to fold line breaks?
  2367. if len(leading_break) > 0 && leading_break[0] == '\n' {
  2368. if len(trailing_breaks) == 0 {
  2369. s = append(s, ' ')
  2370. } else {
  2371. s = append(s, trailing_breaks...)
  2372. }
  2373. } else {
  2374. s = append(s, leading_break...)
  2375. s = append(s, trailing_breaks...)
  2376. }
  2377. trailing_breaks = trailing_breaks[:0]
  2378. leading_break = leading_break[:0]
  2379. } else {
  2380. s = append(s, whitespaces...)
  2381. whitespaces = whitespaces[:0]
  2382. }
  2383. }
  2384. // Eat the right quote.
  2385. skip(parser)
  2386. end_mark := parser.mark
  2387. // Create a token.
  2388. *token = yaml_token_t{
  2389. typ: yaml_SCALAR_TOKEN,
  2390. start_mark: start_mark,
  2391. end_mark: end_mark,
  2392. value: s,
  2393. style: yaml_SINGLE_QUOTED_SCALAR_STYLE,
  2394. }
  2395. if !single {
  2396. token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
  2397. }
  2398. return true
  2399. }
  2400. // Scan a plain scalar.
  2401. func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {
  2402. var s, leading_break, trailing_breaks, whitespaces []byte
  2403. var leading_blanks bool
  2404. var indent = parser.indent + 1
  2405. start_mark := parser.mark
  2406. end_mark := parser.mark
  2407. // Consume the content of the plain scalar.
  2408. for {
  2409. // Check for a document indicator.
  2410. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2411. return false
  2412. }
  2413. if parser.mark.column == 0 &&
  2414. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2415. parser.buffer[parser.buffer_pos+1] == '-' &&
  2416. parser.buffer[parser.buffer_pos+2] == '-') ||
  2417. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2418. parser.buffer[parser.buffer_pos+1] == '.' &&
  2419. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2420. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2421. break
  2422. }
  2423. // Check for a comment.
  2424. if parser.buffer[parser.buffer_pos] == '#' {
  2425. break
  2426. }
  2427. // Consume non-blank characters.
  2428. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2429. // Check for indicators that may end a plain scalar.
  2430. if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||
  2431. (parser.flow_level > 0 &&
  2432. (parser.buffer[parser.buffer_pos] == ',' ||
  2433. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||
  2434. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  2435. parser.buffer[parser.buffer_pos] == '}')) {
  2436. break
  2437. }
  2438. // Check if we need to join whitespaces and breaks.
  2439. if leading_blanks || len(whitespaces) > 0 {
  2440. if leading_blanks {
  2441. // Do we need to fold line breaks?
  2442. if leading_break[0] == '\n' {
  2443. if len(trailing_breaks) == 0 {
  2444. s = append(s, ' ')
  2445. } else {
  2446. s = append(s, trailing_breaks...)
  2447. }
  2448. } else {
  2449. s = append(s, leading_break...)
  2450. s = append(s, trailing_breaks...)
  2451. }
  2452. trailing_breaks = trailing_breaks[:0]
  2453. leading_break = leading_break[:0]
  2454. leading_blanks = false
  2455. } else {
  2456. s = append(s, whitespaces...)
  2457. whitespaces = whitespaces[:0]
  2458. }
  2459. }
  2460. // Copy the character.
  2461. s = read(parser, s)
  2462. end_mark = parser.mark
  2463. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2464. return false
  2465. }
  2466. }
  2467. // Is it the end?
  2468. if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {
  2469. break
  2470. }
  2471. // Consume blank characters.
  2472. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2473. return false
  2474. }
  2475. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2476. if is_blank(parser.buffer, parser.buffer_pos) {
  2477. // Check for tab characters that abuse indentation.
  2478. if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {
  2479. yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
  2480. start_mark, "found a tab character that violates indentation")
  2481. return false
  2482. }
  2483. // Consume a space or a tab character.
  2484. if !leading_blanks {
  2485. whitespaces = read(parser, whitespaces)
  2486. } else {
  2487. skip(parser)
  2488. }
  2489. } else {
  2490. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2491. return false
  2492. }
  2493. // Check if it is a first line break.
  2494. if !leading_blanks {
  2495. whitespaces = whitespaces[:0]
  2496. leading_break = read_line(parser, leading_break)
  2497. leading_blanks = true
  2498. } else {
  2499. trailing_breaks = read_line(parser, trailing_breaks)
  2500. }
  2501. }
  2502. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2503. return false
  2504. }
  2505. }
  2506. // Check indentation level.
  2507. if parser.flow_level == 0 && parser.mark.column < indent {
  2508. break
  2509. }
  2510. }
  2511. // Create a token.
  2512. *token = yaml_token_t{
  2513. typ: yaml_SCALAR_TOKEN,
  2514. start_mark: start_mark,
  2515. end_mark: end_mark,
  2516. value: s,
  2517. style: yaml_PLAIN_SCALAR_STYLE,
  2518. }
  2519. // Note that we change the 'simple_key_allowed' flag.
  2520. if leading_blanks {
  2521. parser.simple_key_allowed = true
  2522. }
  2523. return true
  2524. }
  2525. func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {
  2526. if parser.newlines > 0 {
  2527. return true
  2528. }
  2529. var start_mark yaml_mark_t
  2530. var text []byte
  2531. for peek := 0; peek < 512; peek++ {
  2532. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2533. break
  2534. }
  2535. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2536. continue
  2537. }
  2538. if parser.buffer[parser.buffer_pos+peek] == '#' {
  2539. seen := parser.mark.index + peek
  2540. for {
  2541. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2542. return false
  2543. }
  2544. if is_breakz(parser.buffer, parser.buffer_pos) {
  2545. if parser.mark.index >= seen {
  2546. break
  2547. }
  2548. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2549. return false
  2550. }
  2551. skip_line(parser)
  2552. } else if parser.mark.index >= seen {
  2553. if len(text) == 0 {
  2554. start_mark = parser.mark
  2555. }
  2556. text = read(parser, text)
  2557. } else {
  2558. skip(parser)
  2559. }
  2560. }
  2561. }
  2562. break
  2563. }
  2564. if len(text) > 0 {
  2565. parser.comments = append(parser.comments, yaml_comment_t{
  2566. token_mark: token_mark,
  2567. start_mark: start_mark,
  2568. line: text,
  2569. })
  2570. }
  2571. return true
  2572. }
  2573. func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {
  2574. token := parser.tokens[len(parser.tokens)-1]
  2575. if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {
  2576. token = parser.tokens[len(parser.tokens)-2]
  2577. }
  2578. var token_mark = token.start_mark
  2579. var start_mark yaml_mark_t
  2580. var next_indent = parser.indent
  2581. if next_indent < 0 {
  2582. next_indent = 0
  2583. }
  2584. var recent_empty = false
  2585. var first_empty = parser.newlines <= 1
  2586. var line = parser.mark.line
  2587. var column = parser.mark.column
  2588. var text []byte
  2589. // The foot line is the place where a comment must start to
  2590. // still be considered as a foot of the prior content.
  2591. // If there's some content in the currently parsed line, then
  2592. // the foot is the line below it.
  2593. var foot_line = -1
  2594. if scan_mark.line > 0 {
  2595. foot_line = parser.mark.line - parser.newlines + 1
  2596. if parser.newlines == 0 && parser.mark.column > 1 {
  2597. foot_line++
  2598. }
  2599. }
  2600. var peek = 0
  2601. for ; peek < 512; peek++ {
  2602. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2603. break
  2604. }
  2605. column++
  2606. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2607. continue
  2608. }
  2609. c := parser.buffer[parser.buffer_pos+peek]
  2610. var close_flow = parser.flow_level > 0 && (c == ']' || c == '}')
  2611. if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) {
  2612. // Got line break or terminator.
  2613. if close_flow || !recent_empty {
  2614. if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) {
  2615. // This is the first empty line and there were no empty lines before,
  2616. // so this initial part of the comment is a foot of the prior token
  2617. // instead of being a head for the following one. Split it up.
  2618. // Alternatively, this might also be the last comment inside a flow
  2619. // scope, so it must be a footer.
  2620. if len(text) > 0 {
  2621. if start_mark.column-1 < next_indent {
  2622. // If dedented it's unrelated to the prior token.
  2623. token_mark = start_mark
  2624. }
  2625. parser.comments = append(parser.comments, yaml_comment_t{
  2626. scan_mark: scan_mark,
  2627. token_mark: token_mark,
  2628. start_mark: start_mark,
  2629. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2630. foot: text,
  2631. })
  2632. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2633. token_mark = scan_mark
  2634. text = nil
  2635. }
  2636. } else {
  2637. if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {
  2638. text = append(text, '\n')
  2639. }
  2640. }
  2641. }
  2642. if !is_break(parser.buffer, parser.buffer_pos+peek) {
  2643. break
  2644. }
  2645. first_empty = false
  2646. recent_empty = true
  2647. column = 0
  2648. line++
  2649. continue
  2650. }
  2651. if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) {
  2652. // The comment at the different indentation is a foot of the
  2653. // preceding data rather than a head of the upcoming one.
  2654. parser.comments = append(parser.comments, yaml_comment_t{
  2655. scan_mark: scan_mark,
  2656. token_mark: token_mark,
  2657. start_mark: start_mark,
  2658. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2659. foot: text,
  2660. })
  2661. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2662. token_mark = scan_mark
  2663. text = nil
  2664. }
  2665. if parser.buffer[parser.buffer_pos+peek] != '#' {
  2666. break
  2667. }
  2668. if len(text) == 0 {
  2669. start_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2670. } else {
  2671. text = append(text, '\n')
  2672. }
  2673. recent_empty = false
  2674. // Consume until after the consumed comment line.
  2675. seen := parser.mark.index + peek
  2676. for {
  2677. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2678. return false
  2679. }
  2680. if is_breakz(parser.buffer, parser.buffer_pos) {
  2681. if parser.mark.index >= seen {
  2682. break
  2683. }
  2684. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2685. return false
  2686. }
  2687. skip_line(parser)
  2688. } else if parser.mark.index >= seen {
  2689. text = read(parser, text)
  2690. } else {
  2691. skip(parser)
  2692. }
  2693. }
  2694. peek = 0
  2695. column = 0
  2696. line = parser.mark.line
  2697. next_indent = parser.indent
  2698. if next_indent < 0 {
  2699. next_indent = 0
  2700. }
  2701. }
  2702. if len(text) > 0 {
  2703. parser.comments = append(parser.comments, yaml_comment_t{
  2704. scan_mark: scan_mark,
  2705. token_mark: start_mark,
  2706. start_mark: start_mark,
  2707. end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column},
  2708. head: text,
  2709. })
  2710. }
  2711. return true
  2712. }