xmldec.go 77 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package xml implements a simple XML 1.0 parser that
  5. // understands XML name spaces.
  6. package soap
  7. // References:
  8. // Annotated XML spec: https://www.xml.com/axml/testaxml.htm
  9. // XML name spaces: https://www.w3.org/TR/REC-xml-names/
  10. // TODO(rsc):
  11. // Test error handling.
  12. import (
  13. "bufio"
  14. "bytes"
  15. "encoding"
  16. "errors"
  17. "fmt"
  18. "io"
  19. "reflect"
  20. "strconv"
  21. "strings"
  22. "sync"
  23. "unicode"
  24. "unicode/utf8"
  25. )
  26. // A SyntaxError represents a syntax error in the XML input stream.
  27. type SyntaxError struct {
  28. Msg string
  29. Line int
  30. }
  31. func (e *SyntaxError) Error() string {
  32. return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg
  33. }
  34. // A Name represents an XML name (Local) annotated
  35. // with a name space identifier (Space).
  36. // In tokens returned by Decoder.Token, the Space identifier
  37. // is given as a canonical URL, not the short prefix used
  38. // in the document being parsed.
  39. type Name struct {
  40. Space, Local string
  41. }
  42. // An Attr represents an attribute in an XML element (Name=Value).
  43. type Attr struct {
  44. Name Name
  45. Value string
  46. }
  47. // A Token is an interface holding one of the token types:
  48. // StartElement, EndElement, CharData, Comment, ProcInst, or Directive.
  49. type Token interface{}
  50. // A StartElement represents an XML start element.
  51. type StartElement struct {
  52. Name Name
  53. Attr []Attr
  54. }
  55. // Copy creates a new copy of StartElement.
  56. func (e StartElement) Copy() StartElement {
  57. attrs := make([]Attr, len(e.Attr))
  58. copy(attrs, e.Attr)
  59. e.Attr = attrs
  60. return e
  61. }
  62. // End returns the corresponding XML end element.
  63. func (e StartElement) End() EndElement {
  64. return EndElement{e.Name}
  65. }
  66. // An EndElement represents an XML end element.
  67. type EndElement struct {
  68. Name Name
  69. }
  70. // A CharData represents XML character data (raw text),
  71. // in which XML escape sequences have been replaced by
  72. // the characters they represent.
  73. type CharData []byte
  74. func makeCopy(b []byte) []byte {
  75. b1 := make([]byte, len(b))
  76. copy(b1, b)
  77. return b1
  78. }
  79. // Copy creates a new copy of CharData.
  80. func (c CharData) Copy() CharData { return CharData(makeCopy(c)) }
  81. // A Comment represents an XML comment of the form <!--comment-->.
  82. // The bytes do not include the <!-- and --> comment markers.
  83. type Comment []byte
  84. // Copy creates a new copy of Comment.
  85. func (c Comment) Copy() Comment { return Comment(makeCopy(c)) }
  86. // A ProcInst represents an XML processing instruction of the form <?target inst?>
  87. type ProcInst struct {
  88. Target string
  89. Inst []byte
  90. }
  91. // Copy creates a new copy of ProcInst.
  92. func (p ProcInst) Copy() ProcInst {
  93. p.Inst = makeCopy(p.Inst)
  94. return p
  95. }
  96. // A Directive represents an XML directive of the form <!text>.
  97. // The bytes do not include the <! and > markers.
  98. type Directive []byte
  99. // Copy creates a new copy of Directive.
  100. func (d Directive) Copy() Directive { return Directive(makeCopy(d)) }
  101. // CopyToken returns a copy of a Token.
  102. func CopyToken(t Token) Token {
  103. switch v := t.(type) {
  104. case CharData:
  105. return v.Copy()
  106. case Comment:
  107. return v.Copy()
  108. case Directive:
  109. return v.Copy()
  110. case ProcInst:
  111. return v.Copy()
  112. case StartElement:
  113. return v.Copy()
  114. }
  115. return t
  116. }
  117. // A TokenReader is anything that can decode a stream of XML tokens, including a
  118. // Decoder.
  119. //
  120. // When Token encounters an error or end-of-file condition after successfully
  121. // reading a token, it returns the token. It may return the (non-nil) error from
  122. // the same call or return the error (and a nil token) from a subsequent call.
  123. // An instance of this general case is that a TokenReader returning a non-nil
  124. // token at the end of the token stream may return either io.EOF or a nil error.
  125. // The next Read should return nil, io.EOF.
  126. //
  127. // Implementations of Token are discouraged from returning a nil token with a
  128. // nil error. Callers should treat a return of nil, nil as indicating that
  129. // nothing happened; in particular it does not indicate EOF.
  130. type TokenReader interface {
  131. Token() (Token, error)
  132. }
  133. // A Decoder represents an XML parser reading a particular input stream.
  134. // The parser assumes that its input is encoded in UTF-8.
  135. type Decoder struct {
  136. // Strict defaults to true, enforcing the requirements
  137. // of the XML specification.
  138. // If set to false, the parser allows input containing common
  139. // mistakes:
  140. // * If an element is missing an end tag, the parser invents
  141. // end tags as necessary to keep the return values from Token
  142. // properly balanced.
  143. // * In attribute values and character data, unknown or malformed
  144. // character entities (sequences beginning with &) are left alone.
  145. //
  146. // Setting:
  147. //
  148. // d.Strict = false
  149. // d.AutoClose = xml.HTMLAutoClose
  150. // d.Entity = xml.HTMLEntity
  151. //
  152. // creates a parser that can handle typical HTML.
  153. //
  154. // Strict mode does not enforce the requirements of the XML name spaces TR.
  155. // In particular it does not reject name space tags using undefined prefixes.
  156. // Such tags are recorded with the unknown prefix as the name space URL.
  157. Strict bool
  158. // When Strict == false, AutoClose indicates a set of elements to
  159. // consider closed immediately after they are opened, regardless
  160. // of whether an end element is present.
  161. AutoClose []string
  162. // Entity can be used to map non-standard entity names to string replacements.
  163. // The parser behaves as if these standard mappings are present in the map,
  164. // regardless of the actual map content:
  165. //
  166. // "lt": "<",
  167. // "gt": ">",
  168. // "amp": "&",
  169. // "apos": "'",
  170. // "quot": `"`,
  171. Entity map[string]string
  172. // CharsetReader, if non-nil, defines a function to generate
  173. // charset-conversion readers, converting from the provided
  174. // non-UTF-8 charset into UTF-8. If CharsetReader is nil or
  175. // returns an error, parsing stops with an error. One of the
  176. // CharsetReader's result values must be non-nil.
  177. CharsetReader func(charset string, input io.Reader) (io.Reader, error)
  178. // DefaultSpace sets the default name space used for unadorned tags,
  179. // as if the entire XML stream were wrapped in an element containing
  180. // the attribute xmlns="DefaultSpace".
  181. DefaultSpace string
  182. r io.ByteReader
  183. t TokenReader
  184. buf bytes.Buffer
  185. saved *bytes.Buffer
  186. stk *stack
  187. free *stack
  188. needClose bool
  189. toClose Name
  190. nextToken Token
  191. nextByte int
  192. ns map[string]string
  193. err error
  194. line int
  195. offset int64
  196. unmarshalDepth int
  197. }
  198. // NewDecoder creates a new XML parser reading from r.
  199. // If r does not implement io.ByteReader, NewDecoder will
  200. // do its own buffering.
  201. func NewDecoder(r io.Reader) *Decoder {
  202. d := &Decoder{
  203. ns: make(map[string]string),
  204. nextByte: -1,
  205. line: 1,
  206. Strict: true,
  207. }
  208. d.switchToReader(r)
  209. return d
  210. }
  211. // NewTokenDecoder creates a new XML parser using an underlying token stream.
  212. func NewTokenDecoder(t TokenReader) *Decoder {
  213. // Is it already a Decoder?
  214. if d, ok := t.(*Decoder); ok {
  215. return d
  216. }
  217. d := &Decoder{
  218. ns: make(map[string]string),
  219. t: t,
  220. nextByte: -1,
  221. line: 1,
  222. Strict: true,
  223. }
  224. return d
  225. }
  226. // Token returns the next XML token in the input stream.
  227. // At the end of the input stream, Token returns nil, io.EOF.
  228. //
  229. // Slices of bytes in the returned token data refer to the
  230. // parser's internal buffer and remain valid only until the next
  231. // call to Token. To acquire a copy of the bytes, call CopyToken
  232. // or the token's Copy method.
  233. //
  234. // Token expands self-closing elements such as <br/>
  235. // into separate start and end elements returned by successive calls.
  236. //
  237. // Token guarantees that the StartElement and EndElement
  238. // tokens it returns are properly nested and matched:
  239. // if Token encounters an unexpected end element
  240. // or EOF before all expected end elements,
  241. // it will return an error.
  242. //
  243. // Token implements XML name spaces as described by
  244. // https://www.w3.org/TR/REC-xml-names/. Each of the
  245. // Name structures contained in the Token has the Space
  246. // set to the URL identifying its name space when known.
  247. // If Token encounters an unrecognized name space prefix,
  248. // it uses the prefix as the Space rather than report an error.
  249. func (d *Decoder) Token() (Token, error) {
  250. var t Token
  251. var err error
  252. if d.stk != nil && d.stk.kind == stkEOF {
  253. return nil, io.EOF
  254. }
  255. if d.nextToken != nil {
  256. t = d.nextToken
  257. d.nextToken = nil
  258. } else if t, err = d.rawToken(); err != nil {
  259. if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
  260. err = d.syntaxError("unexpected EOF")
  261. }
  262. return t, err
  263. }
  264. if !d.Strict {
  265. if t1, ok := d.autoClose(t); ok {
  266. d.nextToken = t
  267. t = t1
  268. }
  269. }
  270. switch t1 := t.(type) {
  271. case StartElement:
  272. // In XML name spaces, the translations listed in the
  273. // attributes apply to the element name and
  274. // to the other attribute names, so process
  275. // the translations first.
  276. for _, a := range t1.Attr {
  277. if a.Name.Space == xmlnsPrefix {
  278. v, ok := d.ns[a.Name.Local]
  279. d.pushNs(a.Name.Local, v, ok)
  280. d.ns[a.Name.Local] = a.Value
  281. }
  282. if a.Name.Space == "" && a.Name.Local == xmlnsPrefix {
  283. // Default space for untagged names
  284. v, ok := d.ns[""]
  285. d.pushNs("", v, ok)
  286. d.ns[""] = a.Value
  287. }
  288. }
  289. d.translate(&t1.Name, true)
  290. for i := range t1.Attr {
  291. d.translate(&t1.Attr[i].Name, false)
  292. }
  293. d.pushElement(t1.Name)
  294. t = t1
  295. case EndElement:
  296. d.translate(&t1.Name, true)
  297. if !d.popElement(&t1) {
  298. return nil, d.err
  299. }
  300. t = t1
  301. }
  302. return t, err
  303. }
  304. const (
  305. xmlURL = "http://www.w3.org/XML/1998/namespace"
  306. xmlnsPrefix = "xmlns"
  307. xmlPrefix = "xml"
  308. )
  309. // Apply name space translation to name n.
  310. // The default name space (for Space=="")
  311. // applies only to element names, not to attribute names.
  312. func (d *Decoder) translate(n *Name, isElementName bool) {
  313. switch {
  314. case n.Space == xmlnsPrefix:
  315. return
  316. case n.Space == "" && !isElementName:
  317. return
  318. case n.Space == xmlPrefix:
  319. n.Space = xmlURL
  320. case n.Space == "" && n.Local == xmlnsPrefix:
  321. return
  322. }
  323. if v, ok := d.ns[n.Space]; ok {
  324. n.Space = v
  325. } else if n.Space == "" {
  326. n.Space = d.DefaultSpace
  327. }
  328. }
  329. func (d *Decoder) switchToReader(r io.Reader) {
  330. // Get efficient byte at a time reader.
  331. // Assume that if reader has its own
  332. // ReadByte, it's efficient enough.
  333. // Otherwise, use bufio.
  334. if rb, ok := r.(io.ByteReader); ok {
  335. d.r = rb
  336. } else {
  337. d.r = bufio.NewReader(r)
  338. }
  339. }
  340. // Parsing state - stack holds old name space translations
  341. // and the current set of open elements. The translations to pop when
  342. // ending a given tag are *below* it on the stack, which is
  343. // more work but forced on us by XML.
  344. type stack struct {
  345. next *stack
  346. kind int
  347. name Name
  348. ok bool
  349. }
  350. const (
  351. stkStart = iota
  352. stkNs
  353. stkEOF
  354. )
  355. func (d *Decoder) push(kind int) *stack {
  356. s := d.free
  357. if s != nil {
  358. d.free = s.next
  359. } else {
  360. s = new(stack)
  361. }
  362. s.next = d.stk
  363. s.kind = kind
  364. d.stk = s
  365. return s
  366. }
  367. func (d *Decoder) pop() *stack {
  368. s := d.stk
  369. if s != nil {
  370. d.stk = s.next
  371. s.next = d.free
  372. d.free = s
  373. }
  374. return s
  375. }
  376. // Record that after the current element is finished
  377. // (that element is already pushed on the stack)
  378. // Token should return EOF until popEOF is called.
  379. func (d *Decoder) pushEOF() {
  380. // Walk down stack to find Start.
  381. // It might not be the top, because there might be stkNs
  382. // entries above it.
  383. start := d.stk
  384. for start.kind != stkStart {
  385. start = start.next
  386. }
  387. // The stkNs entries below a start are associated with that
  388. // element too; skip over them.
  389. for start.next != nil && start.next.kind == stkNs {
  390. start = start.next
  391. }
  392. s := d.free
  393. if s != nil {
  394. d.free = s.next
  395. } else {
  396. s = new(stack)
  397. }
  398. s.kind = stkEOF
  399. s.next = start.next
  400. start.next = s
  401. }
  402. // Undo a pushEOF.
  403. // The element must have been finished, so the EOF should be at the top of the stack.
  404. func (d *Decoder) popEOF() bool {
  405. if d.stk == nil || d.stk.kind != stkEOF {
  406. return false
  407. }
  408. d.pop()
  409. return true
  410. }
  411. // Record that we are starting an element with the given name.
  412. func (d *Decoder) pushElement(name Name) {
  413. s := d.push(stkStart)
  414. s.name = name
  415. }
  416. // Record that we are changing the value of ns[local].
  417. // The old value is url, ok.
  418. func (d *Decoder) pushNs(local string, url string, ok bool) {
  419. s := d.push(stkNs)
  420. s.name.Local = local
  421. s.name.Space = url
  422. s.ok = ok
  423. }
  424. // Creates a SyntaxError with the current line number.
  425. func (d *Decoder) syntaxError(msg string) error {
  426. return &SyntaxError{Msg: msg, Line: d.line}
  427. }
  428. // Record that we are ending an element with the given name.
  429. // The name must match the record at the top of the stack,
  430. // which must be a pushElement record.
  431. // After popping the element, apply any undo records from
  432. // the stack to restore the name translations that existed
  433. // before we saw this element.
  434. func (d *Decoder) popElement(t *EndElement) bool {
  435. s := d.pop()
  436. name := t.Name
  437. switch {
  438. case s == nil || s.kind != stkStart:
  439. d.err = d.syntaxError("unexpected end element </" + name.Local + ">")
  440. return false
  441. case s.name.Local != name.Local:
  442. if !d.Strict {
  443. d.needClose = true
  444. d.toClose = t.Name
  445. t.Name = s.name
  446. return true
  447. }
  448. d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
  449. return false
  450. case s.name.Space != name.Space:
  451. d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
  452. "closed by </" + name.Local + "> in space " + name.Space)
  453. return false
  454. }
  455. // Pop stack until a Start or EOF is on the top, undoing the
  456. // translations that were associated with the element we just closed.
  457. for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF {
  458. s := d.pop()
  459. if s.ok {
  460. d.ns[s.name.Local] = s.name.Space
  461. } else {
  462. delete(d.ns, s.name.Local)
  463. }
  464. }
  465. return true
  466. }
  467. // If the top element on the stack is autoclosing and
  468. // t is not the end tag, invent the end tag.
  469. func (d *Decoder) autoClose(t Token) (Token, bool) {
  470. if d.stk == nil || d.stk.kind != stkStart {
  471. return nil, false
  472. }
  473. name := strings.ToLower(d.stk.name.Local)
  474. for _, s := range d.AutoClose {
  475. if strings.ToLower(s) == name {
  476. // This one should be auto closed if t doesn't close it.
  477. et, ok := t.(EndElement)
  478. if !ok || et.Name.Local != name {
  479. return EndElement{d.stk.name}, true
  480. }
  481. break
  482. }
  483. }
  484. return nil, false
  485. }
  486. var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method")
  487. // RawToken is like Token but does not verify that
  488. // start and end elements match and does not translate
  489. // name space prefixes to their corresponding URLs.
  490. func (d *Decoder) RawToken() (Token, error) {
  491. if d.unmarshalDepth > 0 {
  492. return nil, errRawToken
  493. }
  494. return d.rawToken()
  495. }
  496. func (d *Decoder) rawToken() (Token, error) {
  497. if d.t != nil {
  498. return d.t.Token()
  499. }
  500. if d.err != nil {
  501. return nil, d.err
  502. }
  503. if d.needClose {
  504. // The last element we read was self-closing and
  505. // we returned just the StartElement half.
  506. // Return the EndElement half now.
  507. d.needClose = false
  508. return EndElement{d.toClose}, nil
  509. }
  510. b, ok := d.getc()
  511. if !ok {
  512. return nil, d.err
  513. }
  514. if b != '<' {
  515. // Text section.
  516. d.ungetc(b)
  517. data := d.text(-1, false)
  518. if data == nil {
  519. return nil, d.err
  520. }
  521. return CharData(data), nil
  522. }
  523. if b, ok = d.mustgetc(); !ok {
  524. return nil, d.err
  525. }
  526. switch b {
  527. case '/':
  528. // </: End element
  529. var name Name
  530. if name, ok = d.nsname(); !ok {
  531. if d.err == nil {
  532. d.err = d.syntaxError("expected element name after </")
  533. }
  534. return nil, d.err
  535. }
  536. d.space()
  537. if b, ok = d.mustgetc(); !ok {
  538. return nil, d.err
  539. }
  540. if b != '>' {
  541. d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
  542. return nil, d.err
  543. }
  544. return EndElement{name}, nil
  545. case '?':
  546. // <?: Processing instruction.
  547. var target string
  548. if target, ok = d.name(); !ok {
  549. if d.err == nil {
  550. d.err = d.syntaxError("expected target name after <?")
  551. }
  552. return nil, d.err
  553. }
  554. d.space()
  555. d.buf.Reset()
  556. var b0 byte
  557. for {
  558. if b, ok = d.mustgetc(); !ok {
  559. return nil, d.err
  560. }
  561. d.buf.WriteByte(b)
  562. if b0 == '?' && b == '>' {
  563. break
  564. }
  565. b0 = b
  566. }
  567. data := d.buf.Bytes()
  568. data = data[0 : len(data)-2] // chop ?>
  569. if target == "xml" {
  570. content := string(data)
  571. ver := procInst("version", content)
  572. if ver != "" && ver != "1.0" {
  573. d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver)
  574. return nil, d.err
  575. }
  576. enc := procInst("encoding", content)
  577. if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
  578. if d.CharsetReader == nil {
  579. d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc)
  580. return nil, d.err
  581. }
  582. newr, err := d.CharsetReader(enc, d.r.(io.Reader))
  583. if err != nil {
  584. d.err = fmt.Errorf("xml: opening charset %q: %v", enc, err)
  585. return nil, d.err
  586. }
  587. if newr == nil {
  588. panic("CharsetReader returned a nil Reader for charset " + enc)
  589. }
  590. d.switchToReader(newr)
  591. }
  592. }
  593. return ProcInst{target, data}, nil
  594. case '!':
  595. // <!: Maybe comment, maybe CDATA.
  596. if b, ok = d.mustgetc(); !ok {
  597. return nil, d.err
  598. }
  599. switch b {
  600. case '-': // <!-
  601. // Probably <!-- for a comment.
  602. if b, ok = d.mustgetc(); !ok {
  603. return nil, d.err
  604. }
  605. if b != '-' {
  606. d.err = d.syntaxError("invalid sequence <!- not part of <!--")
  607. return nil, d.err
  608. }
  609. // Look for terminator.
  610. d.buf.Reset()
  611. var b0, b1 byte
  612. for {
  613. if b, ok = d.mustgetc(); !ok {
  614. return nil, d.err
  615. }
  616. d.buf.WriteByte(b)
  617. if b0 == '-' && b1 == '-' {
  618. if b != '>' {
  619. d.err = d.syntaxError(
  620. `invalid sequence "--" not allowed in comments`)
  621. return nil, d.err
  622. }
  623. break
  624. }
  625. b0, b1 = b1, b
  626. }
  627. data := d.buf.Bytes()
  628. data = data[0 : len(data)-3] // chop -->
  629. return Comment(data), nil
  630. case '[': // <![
  631. // Probably <![CDATA[.
  632. for i := 0; i < 6; i++ {
  633. if b, ok = d.mustgetc(); !ok {
  634. return nil, d.err
  635. }
  636. if b != "CDATA["[i] {
  637. d.err = d.syntaxError("invalid <![ sequence")
  638. return nil, d.err
  639. }
  640. }
  641. // Have <![CDATA[. Read text until ]]>.
  642. data := d.text(-1, true)
  643. if data == nil {
  644. return nil, d.err
  645. }
  646. return CharData(data), nil
  647. }
  648. // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc.
  649. // We don't care, but accumulate for caller. Quoted angle
  650. // brackets do not count for nesting.
  651. d.buf.Reset()
  652. d.buf.WriteByte(b)
  653. inquote := uint8(0)
  654. depth := 0
  655. for {
  656. if b, ok = d.mustgetc(); !ok {
  657. return nil, d.err
  658. }
  659. if inquote == 0 && b == '>' && depth == 0 {
  660. break
  661. }
  662. HandleB:
  663. d.buf.WriteByte(b)
  664. switch {
  665. case b == inquote:
  666. inquote = 0
  667. case inquote != 0:
  668. // in quotes, no special action
  669. case b == '\'' || b == '"':
  670. inquote = b
  671. case b == '>' && inquote == 0:
  672. depth--
  673. case b == '<' && inquote == 0:
  674. // Look for <!-- to begin comment.
  675. s := "!--"
  676. for i := 0; i < len(s); i++ {
  677. if b, ok = d.mustgetc(); !ok {
  678. return nil, d.err
  679. }
  680. if b != s[i] {
  681. for j := 0; j < i; j++ {
  682. d.buf.WriteByte(s[j])
  683. }
  684. depth++
  685. goto HandleB
  686. }
  687. }
  688. // Remove < that was written above.
  689. d.buf.Truncate(d.buf.Len() - 1)
  690. // Look for terminator.
  691. var b0, b1 byte
  692. for {
  693. if b, ok = d.mustgetc(); !ok {
  694. return nil, d.err
  695. }
  696. if b0 == '-' && b1 == '-' && b == '>' {
  697. break
  698. }
  699. b0, b1 = b1, b
  700. }
  701. }
  702. }
  703. return Directive(d.buf.Bytes()), nil
  704. }
  705. // Must be an open element like <a href="foo">
  706. d.ungetc(b)
  707. var (
  708. name Name
  709. empty bool
  710. attr []Attr
  711. )
  712. if name, ok = d.nsname(); !ok {
  713. if d.err == nil {
  714. d.err = d.syntaxError("expected element name after <")
  715. }
  716. return nil, d.err
  717. }
  718. attr = []Attr{}
  719. for {
  720. d.space()
  721. if b, ok = d.mustgetc(); !ok {
  722. return nil, d.err
  723. }
  724. if b == '/' {
  725. empty = true
  726. if b, ok = d.mustgetc(); !ok {
  727. return nil, d.err
  728. }
  729. if b != '>' {
  730. d.err = d.syntaxError("expected /> in element")
  731. return nil, d.err
  732. }
  733. break
  734. }
  735. if b == '>' {
  736. break
  737. }
  738. d.ungetc(b)
  739. a := Attr{}
  740. if a.Name, ok = d.nsname(); !ok {
  741. if d.err == nil {
  742. d.err = d.syntaxError("expected attribute name in element")
  743. }
  744. return nil, d.err
  745. }
  746. d.space()
  747. if b, ok = d.mustgetc(); !ok {
  748. return nil, d.err
  749. }
  750. if b != '=' {
  751. if d.Strict {
  752. d.err = d.syntaxError("attribute name without = in element")
  753. return nil, d.err
  754. }
  755. d.ungetc(b)
  756. a.Value = a.Name.Local
  757. } else {
  758. d.space()
  759. data := d.attrval()
  760. if data == nil {
  761. return nil, d.err
  762. }
  763. a.Value = string(data)
  764. }
  765. attr = append(attr, a)
  766. }
  767. if empty {
  768. d.needClose = true
  769. d.toClose = name
  770. }
  771. return StartElement{name, attr}, nil
  772. }
  773. func (d *Decoder) attrval() []byte {
  774. b, ok := d.mustgetc()
  775. if !ok {
  776. return nil
  777. }
  778. // Handle quoted attribute values
  779. if b == '"' || b == '\'' {
  780. return d.text(int(b), false)
  781. }
  782. // Handle unquoted attribute values for strict parsers
  783. if d.Strict {
  784. d.err = d.syntaxError("unquoted or missing attribute value in element")
  785. return nil
  786. }
  787. // Handle unquoted attribute values for unstrict parsers
  788. d.ungetc(b)
  789. d.buf.Reset()
  790. for {
  791. b, ok = d.mustgetc()
  792. if !ok {
  793. return nil
  794. }
  795. // https://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2
  796. if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
  797. '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
  798. d.buf.WriteByte(b)
  799. } else {
  800. d.ungetc(b)
  801. break
  802. }
  803. }
  804. return d.buf.Bytes()
  805. }
  806. // Skip spaces if any
  807. func (d *Decoder) space() {
  808. for {
  809. b, ok := d.getc()
  810. if !ok {
  811. return
  812. }
  813. switch b {
  814. case ' ', '\r', '\n', '\t':
  815. default:
  816. d.ungetc(b)
  817. return
  818. }
  819. }
  820. }
  821. // Read a single byte.
  822. // If there is no byte to read, return ok==false
  823. // and leave the error in d.err.
  824. // Maintain line number.
  825. func (d *Decoder) getc() (b byte, ok bool) {
  826. if d.err != nil {
  827. return 0, false
  828. }
  829. if d.nextByte >= 0 {
  830. b = byte(d.nextByte)
  831. d.nextByte = -1
  832. } else {
  833. b, d.err = d.r.ReadByte()
  834. if d.err != nil {
  835. return 0, false
  836. }
  837. if d.saved != nil {
  838. d.saved.WriteByte(b)
  839. }
  840. }
  841. if b == '\n' {
  842. d.line++
  843. }
  844. d.offset++
  845. return b, true
  846. }
  847. // InputOffset returns the input stream byte offset of the current decoder position.
  848. // The offset gives the location of the end of the most recently returned token
  849. // and the beginning of the next token.
  850. func (d *Decoder) InputOffset() int64 {
  851. return d.offset
  852. }
  853. // Return saved offset.
  854. // If we did ungetc (nextByte >= 0), have to back up one.
  855. func (d *Decoder) savedOffset() int {
  856. n := d.saved.Len()
  857. if d.nextByte >= 0 {
  858. n--
  859. }
  860. return n
  861. }
  862. // Must read a single byte.
  863. // If there is no byte to read,
  864. // set d.err to SyntaxError("unexpected EOF")
  865. // and return ok==false
  866. func (d *Decoder) mustgetc() (b byte, ok bool) {
  867. if b, ok = d.getc(); !ok {
  868. if d.err == io.EOF {
  869. d.err = d.syntaxError("unexpected EOF")
  870. }
  871. }
  872. return
  873. }
  874. // Unread a single byte.
  875. func (d *Decoder) ungetc(b byte) {
  876. if b == '\n' {
  877. d.line--
  878. }
  879. d.nextByte = int(b)
  880. d.offset--
  881. }
  882. var entity = map[string]int{
  883. "lt": '<',
  884. "gt": '>',
  885. "amp": '&',
  886. "apos": '\'',
  887. "quot": '"',
  888. }
  889. // Read plain text section (XML calls it character data).
  890. // If quote >= 0, we are in a quoted string and need to find the matching quote.
  891. // If cdata == true, we are in a <![CDATA[ section and need to find ]]>.
  892. // On failure return nil and leave the error in d.err.
  893. func (d *Decoder) text(quote int, cdata bool) []byte {
  894. var b0, b1 byte
  895. var trunc int
  896. d.buf.Reset()
  897. Input:
  898. for {
  899. b, ok := d.getc()
  900. if !ok {
  901. if cdata {
  902. if d.err == io.EOF {
  903. d.err = d.syntaxError("unexpected EOF in CDATA section")
  904. }
  905. return nil
  906. }
  907. break Input
  908. }
  909. // <![CDATA[ section ends with ]]>.
  910. // It is an error for ]]> to appear in ordinary text.
  911. if b0 == ']' && b1 == ']' && b == '>' {
  912. if cdata {
  913. trunc = 2
  914. break Input
  915. }
  916. d.err = d.syntaxError("unescaped ]]> not in CDATA section")
  917. return nil
  918. }
  919. // Stop reading text if we see a <.
  920. if b == '<' && !cdata {
  921. if quote >= 0 {
  922. d.err = d.syntaxError("unescaped < inside quoted string")
  923. return nil
  924. }
  925. d.ungetc('<')
  926. break Input
  927. }
  928. if quote >= 0 && b == byte(quote) {
  929. break Input
  930. }
  931. if b == '&' && !cdata {
  932. // Read escaped character expression up to semicolon.
  933. // XML in all its glory allows a document to define and use
  934. // its own character names with <!ENTITY ...> directives.
  935. // Parsers are required to recognize lt, gt, amp, apos, and quot
  936. // even if they have not been declared.
  937. before := d.buf.Len()
  938. d.buf.WriteByte('&')
  939. var ok bool
  940. var text string
  941. var haveText bool
  942. if b, ok = d.mustgetc(); !ok {
  943. return nil
  944. }
  945. if b == '#' {
  946. d.buf.WriteByte(b)
  947. if b, ok = d.mustgetc(); !ok {
  948. return nil
  949. }
  950. base := 10
  951. if b == 'x' {
  952. base = 16
  953. d.buf.WriteByte(b)
  954. if b, ok = d.mustgetc(); !ok {
  955. return nil
  956. }
  957. }
  958. start := d.buf.Len()
  959. for '0' <= b && b <= '9' ||
  960. base == 16 && 'a' <= b && b <= 'f' ||
  961. base == 16 && 'A' <= b && b <= 'F' {
  962. d.buf.WriteByte(b)
  963. if b, ok = d.mustgetc(); !ok {
  964. return nil
  965. }
  966. }
  967. if b != ';' {
  968. d.ungetc(b)
  969. } else {
  970. s := string(d.buf.Bytes()[start:])
  971. d.buf.WriteByte(';')
  972. n, err := strconv.ParseUint(s, base, 64)
  973. if err == nil && n <= unicode.MaxRune {
  974. text = string(n)
  975. haveText = true
  976. }
  977. }
  978. } else {
  979. d.ungetc(b)
  980. if !d.readName() {
  981. if d.err != nil {
  982. return nil
  983. }
  984. }
  985. if b, ok = d.mustgetc(); !ok {
  986. return nil
  987. }
  988. if b != ';' {
  989. d.ungetc(b)
  990. } else {
  991. name := d.buf.Bytes()[before+1:]
  992. d.buf.WriteByte(';')
  993. if isName(name) {
  994. s := string(name)
  995. if r, ok := entity[s]; ok {
  996. text = string(r)
  997. haveText = true
  998. } else if d.Entity != nil {
  999. text, haveText = d.Entity[s]
  1000. }
  1001. }
  1002. }
  1003. }
  1004. if haveText {
  1005. d.buf.Truncate(before)
  1006. d.buf.Write([]byte(text))
  1007. b0, b1 = 0, 0
  1008. continue Input
  1009. }
  1010. if !d.Strict {
  1011. b0, b1 = 0, 0
  1012. continue Input
  1013. }
  1014. ent := string(d.buf.Bytes()[before:])
  1015. if ent[len(ent)-1] != ';' {
  1016. ent += " (no semicolon)"
  1017. }
  1018. d.err = d.syntaxError("invalid character entity " + ent)
  1019. return nil
  1020. }
  1021. // We must rewrite unescaped \r and \r\n into \n.
  1022. if b == '\r' {
  1023. d.buf.WriteByte('\n')
  1024. } else if b1 == '\r' && b == '\n' {
  1025. // Skip \r\n--we already wrote \n.
  1026. } else {
  1027. d.buf.WriteByte(b)
  1028. }
  1029. b0, b1 = b1, b
  1030. }
  1031. data := d.buf.Bytes()
  1032. data = data[0 : len(data)-trunc]
  1033. // Inspect each rune for being a disallowed character.
  1034. buf := data
  1035. for len(buf) > 0 {
  1036. r, size := utf8.DecodeRune(buf)
  1037. if r == utf8.RuneError && size == 1 {
  1038. d.err = d.syntaxError("invalid UTF-8")
  1039. return nil
  1040. }
  1041. buf = buf[size:]
  1042. if !isInCharacterRange(r) {
  1043. d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r))
  1044. return nil
  1045. }
  1046. }
  1047. return data
  1048. }
  1049. // Decide whether the given rune is in the XML Character Range, per
  1050. // the Char production of https://www.xml.com/axml/testaxml.htm,
  1051. // Section 2.2 Characters.
  1052. func isInCharacterRange(r rune) (inrange bool) {
  1053. return r == 0x09 ||
  1054. r == 0x0A ||
  1055. r == 0x0D ||
  1056. r >= 0x20 && r <= 0xD7FF ||
  1057. r >= 0xE000 && r <= 0xFFFD ||
  1058. r >= 0x10000 && r <= 0x10FFFF
  1059. }
  1060. // Get name space name: name with a : stuck in the middle.
  1061. // The part before the : is the name space identifier.
  1062. func (d *Decoder) nsname() (name Name, ok bool) {
  1063. s, ok := d.name()
  1064. if !ok {
  1065. return
  1066. }
  1067. i := strings.Index(s, ":")
  1068. if i < 0 {
  1069. name.Local = s
  1070. } else {
  1071. name.Space = s[0:i]
  1072. name.Local = s[i+1:]
  1073. }
  1074. return name, true
  1075. }
  1076. // Get name: /first(first|second)*/
  1077. // Do not set d.err if the name is missing (unless unexpected EOF is received):
  1078. // let the caller provide better context.
  1079. func (d *Decoder) name() (s string, ok bool) {
  1080. d.buf.Reset()
  1081. if !d.readName() {
  1082. return "", false
  1083. }
  1084. // Now we check the characters.
  1085. b := d.buf.Bytes()
  1086. if !isName(b) {
  1087. d.err = d.syntaxError("invalid XML name: " + string(b))
  1088. return "", false
  1089. }
  1090. return string(b), true
  1091. }
  1092. // Read a name and append its bytes to d.buf.
  1093. // The name is delimited by any single-byte character not valid in names.
  1094. // All multi-byte characters are accepted; the caller must check their validity.
  1095. func (d *Decoder) readName() (ok bool) {
  1096. var b byte
  1097. if b, ok = d.mustgetc(); !ok {
  1098. return
  1099. }
  1100. if b < utf8.RuneSelf && !isNameByte(b) {
  1101. d.ungetc(b)
  1102. return false
  1103. }
  1104. d.buf.WriteByte(b)
  1105. for {
  1106. if b, ok = d.mustgetc(); !ok {
  1107. return
  1108. }
  1109. if b < utf8.RuneSelf && !isNameByte(b) {
  1110. d.ungetc(b)
  1111. break
  1112. }
  1113. d.buf.WriteByte(b)
  1114. }
  1115. return true
  1116. }
  1117. func isNameByte(c byte) bool {
  1118. return 'A' <= c && c <= 'Z' ||
  1119. 'a' <= c && c <= 'z' ||
  1120. '0' <= c && c <= '9' ||
  1121. c == '_' || c == ':' || c == '.' || c == '-'
  1122. }
  1123. func isName(s []byte) bool {
  1124. if len(s) == 0 {
  1125. return false
  1126. }
  1127. c, n := utf8.DecodeRune(s)
  1128. if c == utf8.RuneError && n == 1 {
  1129. return false
  1130. }
  1131. if !unicode.Is(first, c) {
  1132. return false
  1133. }
  1134. for n < len(s) {
  1135. s = s[n:]
  1136. c, n = utf8.DecodeRune(s)
  1137. if c == utf8.RuneError && n == 1 {
  1138. return false
  1139. }
  1140. if !unicode.Is(first, c) && !unicode.Is(second, c) {
  1141. return false
  1142. }
  1143. }
  1144. return true
  1145. }
  1146. func isNameString(s string) bool {
  1147. if len(s) == 0 {
  1148. return false
  1149. }
  1150. c, n := utf8.DecodeRuneInString(s)
  1151. if c == utf8.RuneError && n == 1 {
  1152. return false
  1153. }
  1154. if !unicode.Is(first, c) {
  1155. return false
  1156. }
  1157. for n < len(s) {
  1158. s = s[n:]
  1159. c, n = utf8.DecodeRuneInString(s)
  1160. if c == utf8.RuneError && n == 1 {
  1161. return false
  1162. }
  1163. if !unicode.Is(first, c) && !unicode.Is(second, c) {
  1164. return false
  1165. }
  1166. }
  1167. return true
  1168. }
  1169. // These tables were generated by cut and paste from Appendix B of
  1170. // the XML spec at https://www.xml.com/axml/testaxml.htm
  1171. // and then reformatting. First corresponds to (Letter | '_' | ':')
  1172. // and second corresponds to NameChar.
  1173. var first = &unicode.RangeTable{
  1174. R16: []unicode.Range16{
  1175. {0x003A, 0x003A, 1},
  1176. {0x0041, 0x005A, 1},
  1177. {0x005F, 0x005F, 1},
  1178. {0x0061, 0x007A, 1},
  1179. {0x00C0, 0x00D6, 1},
  1180. {0x00D8, 0x00F6, 1},
  1181. {0x00F8, 0x00FF, 1},
  1182. {0x0100, 0x0131, 1},
  1183. {0x0134, 0x013E, 1},
  1184. {0x0141, 0x0148, 1},
  1185. {0x014A, 0x017E, 1},
  1186. {0x0180, 0x01C3, 1},
  1187. {0x01CD, 0x01F0, 1},
  1188. {0x01F4, 0x01F5, 1},
  1189. {0x01FA, 0x0217, 1},
  1190. {0x0250, 0x02A8, 1},
  1191. {0x02BB, 0x02C1, 1},
  1192. {0x0386, 0x0386, 1},
  1193. {0x0388, 0x038A, 1},
  1194. {0x038C, 0x038C, 1},
  1195. {0x038E, 0x03A1, 1},
  1196. {0x03A3, 0x03CE, 1},
  1197. {0x03D0, 0x03D6, 1},
  1198. {0x03DA, 0x03E0, 2},
  1199. {0x03E2, 0x03F3, 1},
  1200. {0x0401, 0x040C, 1},
  1201. {0x040E, 0x044F, 1},
  1202. {0x0451, 0x045C, 1},
  1203. {0x045E, 0x0481, 1},
  1204. {0x0490, 0x04C4, 1},
  1205. {0x04C7, 0x04C8, 1},
  1206. {0x04CB, 0x04CC, 1},
  1207. {0x04D0, 0x04EB, 1},
  1208. {0x04EE, 0x04F5, 1},
  1209. {0x04F8, 0x04F9, 1},
  1210. {0x0531, 0x0556, 1},
  1211. {0x0559, 0x0559, 1},
  1212. {0x0561, 0x0586, 1},
  1213. {0x05D0, 0x05EA, 1},
  1214. {0x05F0, 0x05F2, 1},
  1215. {0x0621, 0x063A, 1},
  1216. {0x0641, 0x064A, 1},
  1217. {0x0671, 0x06B7, 1},
  1218. {0x06BA, 0x06BE, 1},
  1219. {0x06C0, 0x06CE, 1},
  1220. {0x06D0, 0x06D3, 1},
  1221. {0x06D5, 0x06D5, 1},
  1222. {0x06E5, 0x06E6, 1},
  1223. {0x0905, 0x0939, 1},
  1224. {0x093D, 0x093D, 1},
  1225. {0x0958, 0x0961, 1},
  1226. {0x0985, 0x098C, 1},
  1227. {0x098F, 0x0990, 1},
  1228. {0x0993, 0x09A8, 1},
  1229. {0x09AA, 0x09B0, 1},
  1230. {0x09B2, 0x09B2, 1},
  1231. {0x09B6, 0x09B9, 1},
  1232. {0x09DC, 0x09DD, 1},
  1233. {0x09DF, 0x09E1, 1},
  1234. {0x09F0, 0x09F1, 1},
  1235. {0x0A05, 0x0A0A, 1},
  1236. {0x0A0F, 0x0A10, 1},
  1237. {0x0A13, 0x0A28, 1},
  1238. {0x0A2A, 0x0A30, 1},
  1239. {0x0A32, 0x0A33, 1},
  1240. {0x0A35, 0x0A36, 1},
  1241. {0x0A38, 0x0A39, 1},
  1242. {0x0A59, 0x0A5C, 1},
  1243. {0x0A5E, 0x0A5E, 1},
  1244. {0x0A72, 0x0A74, 1},
  1245. {0x0A85, 0x0A8B, 1},
  1246. {0x0A8D, 0x0A8D, 1},
  1247. {0x0A8F, 0x0A91, 1},
  1248. {0x0A93, 0x0AA8, 1},
  1249. {0x0AAA, 0x0AB0, 1},
  1250. {0x0AB2, 0x0AB3, 1},
  1251. {0x0AB5, 0x0AB9, 1},
  1252. {0x0ABD, 0x0AE0, 0x23},
  1253. {0x0B05, 0x0B0C, 1},
  1254. {0x0B0F, 0x0B10, 1},
  1255. {0x0B13, 0x0B28, 1},
  1256. {0x0B2A, 0x0B30, 1},
  1257. {0x0B32, 0x0B33, 1},
  1258. {0x0B36, 0x0B39, 1},
  1259. {0x0B3D, 0x0B3D, 1},
  1260. {0x0B5C, 0x0B5D, 1},
  1261. {0x0B5F, 0x0B61, 1},
  1262. {0x0B85, 0x0B8A, 1},
  1263. {0x0B8E, 0x0B90, 1},
  1264. {0x0B92, 0x0B95, 1},
  1265. {0x0B99, 0x0B9A, 1},
  1266. {0x0B9C, 0x0B9C, 1},
  1267. {0x0B9E, 0x0B9F, 1},
  1268. {0x0BA3, 0x0BA4, 1},
  1269. {0x0BA8, 0x0BAA, 1},
  1270. {0x0BAE, 0x0BB5, 1},
  1271. {0x0BB7, 0x0BB9, 1},
  1272. {0x0C05, 0x0C0C, 1},
  1273. {0x0C0E, 0x0C10, 1},
  1274. {0x0C12, 0x0C28, 1},
  1275. {0x0C2A, 0x0C33, 1},
  1276. {0x0C35, 0x0C39, 1},
  1277. {0x0C60, 0x0C61, 1},
  1278. {0x0C85, 0x0C8C, 1},
  1279. {0x0C8E, 0x0C90, 1},
  1280. {0x0C92, 0x0CA8, 1},
  1281. {0x0CAA, 0x0CB3, 1},
  1282. {0x0CB5, 0x0CB9, 1},
  1283. {0x0CDE, 0x0CDE, 1},
  1284. {0x0CE0, 0x0CE1, 1},
  1285. {0x0D05, 0x0D0C, 1},
  1286. {0x0D0E, 0x0D10, 1},
  1287. {0x0D12, 0x0D28, 1},
  1288. {0x0D2A, 0x0D39, 1},
  1289. {0x0D60, 0x0D61, 1},
  1290. {0x0E01, 0x0E2E, 1},
  1291. {0x0E30, 0x0E30, 1},
  1292. {0x0E32, 0x0E33, 1},
  1293. {0x0E40, 0x0E45, 1},
  1294. {0x0E81, 0x0E82, 1},
  1295. {0x0E84, 0x0E84, 1},
  1296. {0x0E87, 0x0E88, 1},
  1297. {0x0E8A, 0x0E8D, 3},
  1298. {0x0E94, 0x0E97, 1},
  1299. {0x0E99, 0x0E9F, 1},
  1300. {0x0EA1, 0x0EA3, 1},
  1301. {0x0EA5, 0x0EA7, 2},
  1302. {0x0EAA, 0x0EAB, 1},
  1303. {0x0EAD, 0x0EAE, 1},
  1304. {0x0EB0, 0x0EB0, 1},
  1305. {0x0EB2, 0x0EB3, 1},
  1306. {0x0EBD, 0x0EBD, 1},
  1307. {0x0EC0, 0x0EC4, 1},
  1308. {0x0F40, 0x0F47, 1},
  1309. {0x0F49, 0x0F69, 1},
  1310. {0x10A0, 0x10C5, 1},
  1311. {0x10D0, 0x10F6, 1},
  1312. {0x1100, 0x1100, 1},
  1313. {0x1102, 0x1103, 1},
  1314. {0x1105, 0x1107, 1},
  1315. {0x1109, 0x1109, 1},
  1316. {0x110B, 0x110C, 1},
  1317. {0x110E, 0x1112, 1},
  1318. {0x113C, 0x1140, 2},
  1319. {0x114C, 0x1150, 2},
  1320. {0x1154, 0x1155, 1},
  1321. {0x1159, 0x1159, 1},
  1322. {0x115F, 0x1161, 1},
  1323. {0x1163, 0x1169, 2},
  1324. {0x116D, 0x116E, 1},
  1325. {0x1172, 0x1173, 1},
  1326. {0x1175, 0x119E, 0x119E - 0x1175},
  1327. {0x11A8, 0x11AB, 0x11AB - 0x11A8},
  1328. {0x11AE, 0x11AF, 1},
  1329. {0x11B7, 0x11B8, 1},
  1330. {0x11BA, 0x11BA, 1},
  1331. {0x11BC, 0x11C2, 1},
  1332. {0x11EB, 0x11F0, 0x11F0 - 0x11EB},
  1333. {0x11F9, 0x11F9, 1},
  1334. {0x1E00, 0x1E9B, 1},
  1335. {0x1EA0, 0x1EF9, 1},
  1336. {0x1F00, 0x1F15, 1},
  1337. {0x1F18, 0x1F1D, 1},
  1338. {0x1F20, 0x1F45, 1},
  1339. {0x1F48, 0x1F4D, 1},
  1340. {0x1F50, 0x1F57, 1},
  1341. {0x1F59, 0x1F5B, 0x1F5B - 0x1F59},
  1342. {0x1F5D, 0x1F5D, 1},
  1343. {0x1F5F, 0x1F7D, 1},
  1344. {0x1F80, 0x1FB4, 1},
  1345. {0x1FB6, 0x1FBC, 1},
  1346. {0x1FBE, 0x1FBE, 1},
  1347. {0x1FC2, 0x1FC4, 1},
  1348. {0x1FC6, 0x1FCC, 1},
  1349. {0x1FD0, 0x1FD3, 1},
  1350. {0x1FD6, 0x1FDB, 1},
  1351. {0x1FE0, 0x1FEC, 1},
  1352. {0x1FF2, 0x1FF4, 1},
  1353. {0x1FF6, 0x1FFC, 1},
  1354. {0x2126, 0x2126, 1},
  1355. {0x212A, 0x212B, 1},
  1356. {0x212E, 0x212E, 1},
  1357. {0x2180, 0x2182, 1},
  1358. {0x3007, 0x3007, 1},
  1359. {0x3021, 0x3029, 1},
  1360. {0x3041, 0x3094, 1},
  1361. {0x30A1, 0x30FA, 1},
  1362. {0x3105, 0x312C, 1},
  1363. {0x4E00, 0x9FA5, 1},
  1364. {0xAC00, 0xD7A3, 1},
  1365. },
  1366. }
  1367. var second = &unicode.RangeTable{
  1368. R16: []unicode.Range16{
  1369. {0x002D, 0x002E, 1},
  1370. {0x0030, 0x0039, 1},
  1371. {0x00B7, 0x00B7, 1},
  1372. {0x02D0, 0x02D1, 1},
  1373. {0x0300, 0x0345, 1},
  1374. {0x0360, 0x0361, 1},
  1375. {0x0387, 0x0387, 1},
  1376. {0x0483, 0x0486, 1},
  1377. {0x0591, 0x05A1, 1},
  1378. {0x05A3, 0x05B9, 1},
  1379. {0x05BB, 0x05BD, 1},
  1380. {0x05BF, 0x05BF, 1},
  1381. {0x05C1, 0x05C2, 1},
  1382. {0x05C4, 0x0640, 0x0640 - 0x05C4},
  1383. {0x064B, 0x0652, 1},
  1384. {0x0660, 0x0669, 1},
  1385. {0x0670, 0x0670, 1},
  1386. {0x06D6, 0x06DC, 1},
  1387. {0x06DD, 0x06DF, 1},
  1388. {0x06E0, 0x06E4, 1},
  1389. {0x06E7, 0x06E8, 1},
  1390. {0x06EA, 0x06ED, 1},
  1391. {0x06F0, 0x06F9, 1},
  1392. {0x0901, 0x0903, 1},
  1393. {0x093C, 0x093C, 1},
  1394. {0x093E, 0x094C, 1},
  1395. {0x094D, 0x094D, 1},
  1396. {0x0951, 0x0954, 1},
  1397. {0x0962, 0x0963, 1},
  1398. {0x0966, 0x096F, 1},
  1399. {0x0981, 0x0983, 1},
  1400. {0x09BC, 0x09BC, 1},
  1401. {0x09BE, 0x09BF, 1},
  1402. {0x09C0, 0x09C4, 1},
  1403. {0x09C7, 0x09C8, 1},
  1404. {0x09CB, 0x09CD, 1},
  1405. {0x09D7, 0x09D7, 1},
  1406. {0x09E2, 0x09E3, 1},
  1407. {0x09E6, 0x09EF, 1},
  1408. {0x0A02, 0x0A3C, 0x3A},
  1409. {0x0A3E, 0x0A3F, 1},
  1410. {0x0A40, 0x0A42, 1},
  1411. {0x0A47, 0x0A48, 1},
  1412. {0x0A4B, 0x0A4D, 1},
  1413. {0x0A66, 0x0A6F, 1},
  1414. {0x0A70, 0x0A71, 1},
  1415. {0x0A81, 0x0A83, 1},
  1416. {0x0ABC, 0x0ABC, 1},
  1417. {0x0ABE, 0x0AC5, 1},
  1418. {0x0AC7, 0x0AC9, 1},
  1419. {0x0ACB, 0x0ACD, 1},
  1420. {0x0AE6, 0x0AEF, 1},
  1421. {0x0B01, 0x0B03, 1},
  1422. {0x0B3C, 0x0B3C, 1},
  1423. {0x0B3E, 0x0B43, 1},
  1424. {0x0B47, 0x0B48, 1},
  1425. {0x0B4B, 0x0B4D, 1},
  1426. {0x0B56, 0x0B57, 1},
  1427. {0x0B66, 0x0B6F, 1},
  1428. {0x0B82, 0x0B83, 1},
  1429. {0x0BBE, 0x0BC2, 1},
  1430. {0x0BC6, 0x0BC8, 1},
  1431. {0x0BCA, 0x0BCD, 1},
  1432. {0x0BD7, 0x0BD7, 1},
  1433. {0x0BE7, 0x0BEF, 1},
  1434. {0x0C01, 0x0C03, 1},
  1435. {0x0C3E, 0x0C44, 1},
  1436. {0x0C46, 0x0C48, 1},
  1437. {0x0C4A, 0x0C4D, 1},
  1438. {0x0C55, 0x0C56, 1},
  1439. {0x0C66, 0x0C6F, 1},
  1440. {0x0C82, 0x0C83, 1},
  1441. {0x0CBE, 0x0CC4, 1},
  1442. {0x0CC6, 0x0CC8, 1},
  1443. {0x0CCA, 0x0CCD, 1},
  1444. {0x0CD5, 0x0CD6, 1},
  1445. {0x0CE6, 0x0CEF, 1},
  1446. {0x0D02, 0x0D03, 1},
  1447. {0x0D3E, 0x0D43, 1},
  1448. {0x0D46, 0x0D48, 1},
  1449. {0x0D4A, 0x0D4D, 1},
  1450. {0x0D57, 0x0D57, 1},
  1451. {0x0D66, 0x0D6F, 1},
  1452. {0x0E31, 0x0E31, 1},
  1453. {0x0E34, 0x0E3A, 1},
  1454. {0x0E46, 0x0E46, 1},
  1455. {0x0E47, 0x0E4E, 1},
  1456. {0x0E50, 0x0E59, 1},
  1457. {0x0EB1, 0x0EB1, 1},
  1458. {0x0EB4, 0x0EB9, 1},
  1459. {0x0EBB, 0x0EBC, 1},
  1460. {0x0EC6, 0x0EC6, 1},
  1461. {0x0EC8, 0x0ECD, 1},
  1462. {0x0ED0, 0x0ED9, 1},
  1463. {0x0F18, 0x0F19, 1},
  1464. {0x0F20, 0x0F29, 1},
  1465. {0x0F35, 0x0F39, 2},
  1466. {0x0F3E, 0x0F3F, 1},
  1467. {0x0F71, 0x0F84, 1},
  1468. {0x0F86, 0x0F8B, 1},
  1469. {0x0F90, 0x0F95, 1},
  1470. {0x0F97, 0x0F97, 1},
  1471. {0x0F99, 0x0FAD, 1},
  1472. {0x0FB1, 0x0FB7, 1},
  1473. {0x0FB9, 0x0FB9, 1},
  1474. {0x20D0, 0x20DC, 1},
  1475. {0x20E1, 0x3005, 0x3005 - 0x20E1},
  1476. {0x302A, 0x302F, 1},
  1477. {0x3031, 0x3035, 1},
  1478. {0x3099, 0x309A, 1},
  1479. {0x309D, 0x309E, 1},
  1480. {0x30FC, 0x30FE, 1},
  1481. },
  1482. }
  1483. // HTMLEntity is an entity map containing translations for the
  1484. // standard HTML entity characters.
  1485. //
  1486. // See the Decoder.Strict and Decoder.Entity fields' documentation.
  1487. var HTMLEntity map[string]string = htmlEntity
  1488. var htmlEntity = map[string]string{
  1489. /*
  1490. hget http://www.w3.org/TR/html4/sgml/entities.html |
  1491. ssam '
  1492. ,y /\&gt;/ x/\&lt;(.|\n)+/ s/\n/ /g
  1493. ,x v/^\&lt;!ENTITY/d
  1494. ,s/\&lt;!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/ "\1": "\\u\2",/g
  1495. '
  1496. */
  1497. "nbsp": "\u00A0",
  1498. "iexcl": "\u00A1",
  1499. "cent": "\u00A2",
  1500. "pound": "\u00A3",
  1501. "curren": "\u00A4",
  1502. "yen": "\u00A5",
  1503. "brvbar": "\u00A6",
  1504. "sect": "\u00A7",
  1505. "uml": "\u00A8",
  1506. "copy": "\u00A9",
  1507. "ordf": "\u00AA",
  1508. "laquo": "\u00AB",
  1509. "not": "\u00AC",
  1510. "shy": "\u00AD",
  1511. "reg": "\u00AE",
  1512. "macr": "\u00AF",
  1513. "deg": "\u00B0",
  1514. "plusmn": "\u00B1",
  1515. "sup2": "\u00B2",
  1516. "sup3": "\u00B3",
  1517. "acute": "\u00B4",
  1518. "micro": "\u00B5",
  1519. "para": "\u00B6",
  1520. "middot": "\u00B7",
  1521. "cedil": "\u00B8",
  1522. "sup1": "\u00B9",
  1523. "ordm": "\u00BA",
  1524. "raquo": "\u00BB",
  1525. "frac14": "\u00BC",
  1526. "frac12": "\u00BD",
  1527. "frac34": "\u00BE",
  1528. "iquest": "\u00BF",
  1529. "Agrave": "\u00C0",
  1530. "Aacute": "\u00C1",
  1531. "Acirc": "\u00C2",
  1532. "Atilde": "\u00C3",
  1533. "Auml": "\u00C4",
  1534. "Aring": "\u00C5",
  1535. "AElig": "\u00C6",
  1536. "Ccedil": "\u00C7",
  1537. "Egrave": "\u00C8",
  1538. "Eacute": "\u00C9",
  1539. "Ecirc": "\u00CA",
  1540. "Euml": "\u00CB",
  1541. "Igrave": "\u00CC",
  1542. "Iacute": "\u00CD",
  1543. "Icirc": "\u00CE",
  1544. "Iuml": "\u00CF",
  1545. "ETH": "\u00D0",
  1546. "Ntilde": "\u00D1",
  1547. "Ograve": "\u00D2",
  1548. "Oacute": "\u00D3",
  1549. "Ocirc": "\u00D4",
  1550. "Otilde": "\u00D5",
  1551. "Ouml": "\u00D6",
  1552. "times": "\u00D7",
  1553. "Oslash": "\u00D8",
  1554. "Ugrave": "\u00D9",
  1555. "Uacute": "\u00DA",
  1556. "Ucirc": "\u00DB",
  1557. "Uuml": "\u00DC",
  1558. "Yacute": "\u00DD",
  1559. "THORN": "\u00DE",
  1560. "szlig": "\u00DF",
  1561. "agrave": "\u00E0",
  1562. "aacute": "\u00E1",
  1563. "acirc": "\u00E2",
  1564. "atilde": "\u00E3",
  1565. "auml": "\u00E4",
  1566. "aring": "\u00E5",
  1567. "aelig": "\u00E6",
  1568. "ccedil": "\u00E7",
  1569. "egrave": "\u00E8",
  1570. "eacute": "\u00E9",
  1571. "ecirc": "\u00EA",
  1572. "euml": "\u00EB",
  1573. "igrave": "\u00EC",
  1574. "iacute": "\u00ED",
  1575. "icirc": "\u00EE",
  1576. "iuml": "\u00EF",
  1577. "eth": "\u00F0",
  1578. "ntilde": "\u00F1",
  1579. "ograve": "\u00F2",
  1580. "oacute": "\u00F3",
  1581. "ocirc": "\u00F4",
  1582. "otilde": "\u00F5",
  1583. "ouml": "\u00F6",
  1584. "divide": "\u00F7",
  1585. "oslash": "\u00F8",
  1586. "ugrave": "\u00F9",
  1587. "uacute": "\u00FA",
  1588. "ucirc": "\u00FB",
  1589. "uuml": "\u00FC",
  1590. "yacute": "\u00FD",
  1591. "thorn": "\u00FE",
  1592. "yuml": "\u00FF",
  1593. "fnof": "\u0192",
  1594. "Alpha": "\u0391",
  1595. "Beta": "\u0392",
  1596. "Gamma": "\u0393",
  1597. "Delta": "\u0394",
  1598. "Epsilon": "\u0395",
  1599. "Zeta": "\u0396",
  1600. "Eta": "\u0397",
  1601. "Theta": "\u0398",
  1602. "Iota": "\u0399",
  1603. "Kappa": "\u039A",
  1604. "Lambda": "\u039B",
  1605. "Mu": "\u039C",
  1606. "Nu": "\u039D",
  1607. "Xi": "\u039E",
  1608. "Omicron": "\u039F",
  1609. "Pi": "\u03A0",
  1610. "Rho": "\u03A1",
  1611. "Sigma": "\u03A3",
  1612. "Tau": "\u03A4",
  1613. "Upsilon": "\u03A5",
  1614. "Phi": "\u03A6",
  1615. "Chi": "\u03A7",
  1616. "Psi": "\u03A8",
  1617. "Omega": "\u03A9",
  1618. "alpha": "\u03B1",
  1619. "beta": "\u03B2",
  1620. "gamma": "\u03B3",
  1621. "delta": "\u03B4",
  1622. "epsilon": "\u03B5",
  1623. "zeta": "\u03B6",
  1624. "eta": "\u03B7",
  1625. "theta": "\u03B8",
  1626. "iota": "\u03B9",
  1627. "kappa": "\u03BA",
  1628. "lambda": "\u03BB",
  1629. "mu": "\u03BC",
  1630. "nu": "\u03BD",
  1631. "xi": "\u03BE",
  1632. "omicron": "\u03BF",
  1633. "pi": "\u03C0",
  1634. "rho": "\u03C1",
  1635. "sigmaf": "\u03C2",
  1636. "sigma": "\u03C3",
  1637. "tau": "\u03C4",
  1638. "upsilon": "\u03C5",
  1639. "phi": "\u03C6",
  1640. "chi": "\u03C7",
  1641. "psi": "\u03C8",
  1642. "omega": "\u03C9",
  1643. "thetasym": "\u03D1",
  1644. "upsih": "\u03D2",
  1645. "piv": "\u03D6",
  1646. "bull": "\u2022",
  1647. "hellip": "\u2026",
  1648. "prime": "\u2032",
  1649. "Prime": "\u2033",
  1650. "oline": "\u203E",
  1651. "frasl": "\u2044",
  1652. "weierp": "\u2118",
  1653. "image": "\u2111",
  1654. "real": "\u211C",
  1655. "trade": "\u2122",
  1656. "alefsym": "\u2135",
  1657. "larr": "\u2190",
  1658. "uarr": "\u2191",
  1659. "rarr": "\u2192",
  1660. "darr": "\u2193",
  1661. "harr": "\u2194",
  1662. "crarr": "\u21B5",
  1663. "lArr": "\u21D0",
  1664. "uArr": "\u21D1",
  1665. "rArr": "\u21D2",
  1666. "dArr": "\u21D3",
  1667. "hArr": "\u21D4",
  1668. "forall": "\u2200",
  1669. "part": "\u2202",
  1670. "exist": "\u2203",
  1671. "empty": "\u2205",
  1672. "nabla": "\u2207",
  1673. "isin": "\u2208",
  1674. "notin": "\u2209",
  1675. "ni": "\u220B",
  1676. "prod": "\u220F",
  1677. "sum": "\u2211",
  1678. "minus": "\u2212",
  1679. "lowast": "\u2217",
  1680. "radic": "\u221A",
  1681. "prop": "\u221D",
  1682. "infin": "\u221E",
  1683. "ang": "\u2220",
  1684. "and": "\u2227",
  1685. "or": "\u2228",
  1686. "cap": "\u2229",
  1687. "cup": "\u222A",
  1688. "int": "\u222B",
  1689. "there4": "\u2234",
  1690. "sim": "\u223C",
  1691. "cong": "\u2245",
  1692. "asymp": "\u2248",
  1693. "ne": "\u2260",
  1694. "equiv": "\u2261",
  1695. "le": "\u2264",
  1696. "ge": "\u2265",
  1697. "sub": "\u2282",
  1698. "sup": "\u2283",
  1699. "nsub": "\u2284",
  1700. "sube": "\u2286",
  1701. "supe": "\u2287",
  1702. "oplus": "\u2295",
  1703. "otimes": "\u2297",
  1704. "perp": "\u22A5",
  1705. "sdot": "\u22C5",
  1706. "lceil": "\u2308",
  1707. "rceil": "\u2309",
  1708. "lfloor": "\u230A",
  1709. "rfloor": "\u230B",
  1710. "lang": "\u2329",
  1711. "rang": "\u232A",
  1712. "loz": "\u25CA",
  1713. "spades": "\u2660",
  1714. "clubs": "\u2663",
  1715. "hearts": "\u2665",
  1716. "diams": "\u2666",
  1717. "quot": "\u0022",
  1718. "amp": "\u0026",
  1719. "lt": "\u003C",
  1720. "gt": "\u003E",
  1721. "OElig": "\u0152",
  1722. "oelig": "\u0153",
  1723. "Scaron": "\u0160",
  1724. "scaron": "\u0161",
  1725. "Yuml": "\u0178",
  1726. "circ": "\u02C6",
  1727. "tilde": "\u02DC",
  1728. "ensp": "\u2002",
  1729. "emsp": "\u2003",
  1730. "thinsp": "\u2009",
  1731. "zwnj": "\u200C",
  1732. "zwj": "\u200D",
  1733. "lrm": "\u200E",
  1734. "rlm": "\u200F",
  1735. "ndash": "\u2013",
  1736. "mdash": "\u2014",
  1737. "lsquo": "\u2018",
  1738. "rsquo": "\u2019",
  1739. "sbquo": "\u201A",
  1740. "ldquo": "\u201C",
  1741. "rdquo": "\u201D",
  1742. "bdquo": "\u201E",
  1743. "dagger": "\u2020",
  1744. "Dagger": "\u2021",
  1745. "permil": "\u2030",
  1746. "lsaquo": "\u2039",
  1747. "rsaquo": "\u203A",
  1748. "euro": "\u20AC",
  1749. }
  1750. // HTMLAutoClose is the set of HTML elements that
  1751. // should be considered to close automatically.
  1752. //
  1753. // See the Decoder.Strict and Decoder.Entity fields' documentation.
  1754. var HTMLAutoClose []string = htmlAutoClose
  1755. var htmlAutoClose = []string{
  1756. /*
  1757. hget http://www.w3.org/TR/html4/loose.dtd |
  1758. 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/ "\1",/p' | tr A-Z a-z
  1759. */
  1760. "basefont",
  1761. "br",
  1762. "area",
  1763. "link",
  1764. "img",
  1765. "param",
  1766. "hr",
  1767. "input",
  1768. "col",
  1769. "frame",
  1770. "isindex",
  1771. "base",
  1772. "meta",
  1773. }
  1774. var (
  1775. escQuot = []byte("&#34;") // shorter than "&quot;"
  1776. escApos = []byte("&#39;") // shorter than "&apos;"
  1777. escAmp = []byte("&amp;")
  1778. escLT = []byte("&lt;")
  1779. escGT = []byte("&gt;")
  1780. escTab = []byte("&#x9;")
  1781. escNL = []byte("&#xA;")
  1782. escCR = []byte("&#xD;")
  1783. escFFFD = []byte("\uFFFD") // Unicode replacement character
  1784. )
  1785. // EscapeText writes to w the properly escaped XML equivalent
  1786. // of the plain text data s.
  1787. func EscapeText(w io.Writer, s []byte) error {
  1788. return escapeText(w, s, true)
  1789. }
  1790. // escapeText writes to w the properly escaped XML equivalent
  1791. // of the plain text data s. If escapeNewline is true, newline
  1792. // characters will be escaped.
  1793. func escapeText(w io.Writer, s []byte, escapeNewline bool) error {
  1794. var esc []byte
  1795. last := 0
  1796. for i := 0; i < len(s); {
  1797. r, width := utf8.DecodeRune(s[i:])
  1798. i += width
  1799. switch r {
  1800. case '"':
  1801. esc = escQuot
  1802. case '\'':
  1803. esc = escApos
  1804. case '&':
  1805. esc = escAmp
  1806. case '<':
  1807. esc = escLT
  1808. case '>':
  1809. esc = escGT
  1810. case '\t':
  1811. esc = escTab
  1812. case '\n':
  1813. if !escapeNewline {
  1814. continue
  1815. }
  1816. esc = escNL
  1817. case '\r':
  1818. esc = escCR
  1819. default:
  1820. if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  1821. esc = escFFFD
  1822. break
  1823. }
  1824. continue
  1825. }
  1826. if _, err := w.Write(s[last : i-width]); err != nil {
  1827. return err
  1828. }
  1829. if _, err := w.Write(esc); err != nil {
  1830. return err
  1831. }
  1832. last = i
  1833. }
  1834. _, err := w.Write(s[last:])
  1835. return err
  1836. }
  1837. // EscapeString writes to p the properly escaped XML equivalent
  1838. // of the plain text data s.
  1839. func (p *printer) EscapeString(s string) {
  1840. var esc []byte
  1841. last := 0
  1842. for i := 0; i < len(s); {
  1843. r, width := utf8.DecodeRuneInString(s[i:])
  1844. i += width
  1845. switch r {
  1846. case '"':
  1847. esc = escQuot
  1848. case '\'':
  1849. esc = escApos
  1850. case '&':
  1851. esc = escAmp
  1852. case '<':
  1853. esc = escLT
  1854. case '>':
  1855. esc = escGT
  1856. case '\t':
  1857. esc = escTab
  1858. case '\n':
  1859. esc = escNL
  1860. case '\r':
  1861. esc = escCR
  1862. default:
  1863. if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  1864. esc = escFFFD
  1865. break
  1866. }
  1867. continue
  1868. }
  1869. p.WriteString(s[last : i-width])
  1870. p.Write(esc)
  1871. last = i
  1872. }
  1873. p.WriteString(s[last:])
  1874. }
  1875. // Escape is like EscapeText but omits the error return value.
  1876. // It is provided for backwards compatibility with Go 1.0.
  1877. // Code targeting Go 1.1 or later should use EscapeText.
  1878. func Escape(w io.Writer, s []byte) {
  1879. EscapeText(w, s)
  1880. }
  1881. var (
  1882. cdataStart = []byte("<![CDATA[")
  1883. cdataEnd = []byte("]]>")
  1884. cdataEscape = []byte("]]]]><![CDATA[>")
  1885. )
  1886. // emitCDATA writes to w the CDATA-wrapped plain text data s.
  1887. // It escapes CDATA directives nested in s.
  1888. func emitCDATA(w io.Writer, s []byte) error {
  1889. if len(s) == 0 {
  1890. return nil
  1891. }
  1892. if _, err := w.Write(cdataStart); err != nil {
  1893. return err
  1894. }
  1895. for {
  1896. i := bytes.Index(s, cdataEnd)
  1897. if i >= 0 && i+len(cdataEnd) <= len(s) {
  1898. // Found a nested CDATA directive end.
  1899. if _, err := w.Write(s[:i]); err != nil {
  1900. return err
  1901. }
  1902. if _, err := w.Write(cdataEscape); err != nil {
  1903. return err
  1904. }
  1905. i += len(cdataEnd)
  1906. } else {
  1907. if _, err := w.Write(s); err != nil {
  1908. return err
  1909. }
  1910. break
  1911. }
  1912. s = s[i:]
  1913. }
  1914. _, err := w.Write(cdataEnd)
  1915. return err
  1916. }
  1917. // procInst parses the `param="..."` or `param='...'`
  1918. // value out of the provided string, returning "" if not found.
  1919. func procInst(param, s string) string {
  1920. // TODO: this parsing is somewhat lame and not exact.
  1921. // It works for all actual cases, though.
  1922. param = param + "="
  1923. idx := strings.Index(s, param)
  1924. if idx == -1 {
  1925. return ""
  1926. }
  1927. v := s[idx+len(param):]
  1928. if v == "" {
  1929. return ""
  1930. }
  1931. if v[0] != '\'' && v[0] != '"' {
  1932. return ""
  1933. }
  1934. idx = strings.IndexRune(v[1:], rune(v[0]))
  1935. if idx == -1 {
  1936. return ""
  1937. }
  1938. return v[1 : idx+1]
  1939. }
  1940. type printer struct {
  1941. *bufio.Writer
  1942. seq int
  1943. indent string
  1944. prefix string
  1945. depth int
  1946. indentedIn bool
  1947. putNewline bool
  1948. attrNS map[string]string // map prefix -> name space
  1949. attrPrefix map[string]string // map name space -> prefix
  1950. prefixes []string
  1951. tags []Name
  1952. }
  1953. // read.go
  1954. // BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
  1955. // an XML element is an order-dependent collection of anonymous
  1956. // values, while a data structure is an order-independent collection
  1957. // of named values.
  1958. // See package json for a textual representation more suitable
  1959. // to data structures.
  1960. // Unmarshal parses the XML-encoded data and stores the result in
  1961. // the value pointed to by v, which must be an arbitrary struct,
  1962. // slice, or string. Well-formed data that does not fit into v is
  1963. // discarded.
  1964. //
  1965. // Because Unmarshal uses the reflect package, it can only assign
  1966. // to exported (upper case) fields. Unmarshal uses a case-sensitive
  1967. // comparison to match XML element names to tag values and struct
  1968. // field names.
  1969. //
  1970. // Unmarshal maps an XML element to a struct using the following rules.
  1971. // In the rules, the tag of a field refers to the value associated with the
  1972. // key 'xml' in the struct field's tag (see the example above).
  1973. //
  1974. // * If the struct has a field of type []byte or string with tag
  1975. // ",innerxml", Unmarshal accumulates the raw XML nested inside the
  1976. // element in that field. The rest of the rules still apply.
  1977. //
  1978. // * If the struct has a field named XMLName of type Name,
  1979. // Unmarshal records the element name in that field.
  1980. //
  1981. // * If the XMLName field has an associated tag of the form
  1982. // "name" or "namespace-URL name", the XML element must have
  1983. // the given name (and, optionally, name space) or else Unmarshal
  1984. // returns an error.
  1985. //
  1986. // * If the XML element has an attribute whose name matches a
  1987. // struct field name with an associated tag containing ",attr" or
  1988. // the explicit name in a struct field tag of the form "name,attr",
  1989. // Unmarshal records the attribute value in that field.
  1990. //
  1991. // * If the XML element has an attribute not handled by the previous
  1992. // rule and the struct has a field with an associated tag containing
  1993. // ",any,attr", Unmarshal records the attribute value in the first
  1994. // such field.
  1995. //
  1996. // * If the XML element contains character data, that data is
  1997. // accumulated in the first struct field that has tag ",chardata".
  1998. // The struct field may have type []byte or string.
  1999. // If there is no such field, the character data is discarded.
  2000. //
  2001. // * If the XML element contains comments, they are accumulated in
  2002. // the first struct field that has tag ",comment". The struct
  2003. // field may have type []byte or string. If there is no such
  2004. // field, the comments are discarded.
  2005. //
  2006. // * If the XML element contains a sub-element whose name matches
  2007. // the prefix of a tag formatted as "a" or "a>b>c", unmarshal
  2008. // will descend into the XML structure looking for elements with the
  2009. // given names, and will map the innermost elements to that struct
  2010. // field. A tag starting with ">" is equivalent to one starting
  2011. // with the field name followed by ">".
  2012. //
  2013. // * If the XML element contains a sub-element whose name matches
  2014. // a struct field's XMLName tag and the struct field has no
  2015. // explicit name tag as per the previous rule, unmarshal maps
  2016. // the sub-element to that struct field.
  2017. //
  2018. // * If the XML element contains a sub-element whose name matches a
  2019. // field without any mode flags (",attr", ",chardata", etc), Unmarshal
  2020. // maps the sub-element to that struct field.
  2021. //
  2022. // * If the XML element contains a sub-element that hasn't matched any
  2023. // of the above rules and the struct has a field with tag ",any",
  2024. // unmarshal maps the sub-element to that struct field.
  2025. //
  2026. // * An anonymous struct field is handled as if the fields of its
  2027. // value were part of the outer struct.
  2028. //
  2029. // * A struct field with tag "-" is never unmarshaled into.
  2030. //
  2031. // If Unmarshal encounters a field type that implements the Unmarshaler
  2032. // interface, Unmarshal calls its UnmarshalXML method to produce the value from
  2033. // the XML element. Otherwise, if the value implements
  2034. // encoding.TextUnmarshaler, Unmarshal calls that value's UnmarshalText method.
  2035. //
  2036. // Unmarshal maps an XML element to a string or []byte by saving the
  2037. // concatenation of that element's character data in the string or
  2038. // []byte. The saved []byte is never nil.
  2039. //
  2040. // Unmarshal maps an attribute value to a string or []byte by saving
  2041. // the value in the string or slice.
  2042. //
  2043. // Unmarshal maps an attribute value to an Attr by saving the attribute,
  2044. // including its name, in the Attr.
  2045. //
  2046. // Unmarshal maps an XML element or attribute value to a slice by
  2047. // extending the length of the slice and mapping the element or attribute
  2048. // to the newly created value.
  2049. //
  2050. // Unmarshal maps an XML element or attribute value to a bool by
  2051. // setting it to the boolean value represented by the string. Whitespace
  2052. // is trimmed and ignored.
  2053. //
  2054. // Unmarshal maps an XML element or attribute value to an integer or
  2055. // floating-point field by setting the field to the result of
  2056. // interpreting the string value in decimal. There is no check for
  2057. // overflow. Whitespace is trimmed and ignored.
  2058. //
  2059. // Unmarshal maps an XML element to a Name by recording the element
  2060. // name.
  2061. //
  2062. // Unmarshal maps an XML element to a pointer by setting the pointer
  2063. // to a freshly allocated value and then mapping the element to that value.
  2064. //
  2065. // A missing element or empty attribute value will be unmarshaled as a zero value.
  2066. // If the field is a slice, a zero value will be appended to the field. Otherwise, the
  2067. // field will be set to its zero value.
  2068. func Unmarshal(data []byte, v interface{}) error {
  2069. return NewDecoder(bytes.NewReader(data)).Decode(v)
  2070. }
  2071. // Decode works like Unmarshal, except it reads the decoder
  2072. // stream to find the start element.
  2073. func (d *Decoder) Decode(v interface{}) error {
  2074. return d.DecodeElement(v, nil)
  2075. }
  2076. // DecodeElement works like Unmarshal except that it takes
  2077. // a pointer to the start XML element to decode into v.
  2078. // It is useful when a client reads some raw XML tokens itself
  2079. // but also wants to defer to Unmarshal for some elements.
  2080. func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error {
  2081. val := reflect.ValueOf(v)
  2082. if val.Kind() != reflect.Ptr {
  2083. return errors.New("non-pointer passed to Unmarshal")
  2084. }
  2085. return d.unmarshal(val.Elem(), start)
  2086. }
  2087. // An UnmarshalError represents an error in the unmarshaling process.
  2088. type UnmarshalError string
  2089. func (e UnmarshalError) Error() string { return string(e) }
  2090. // Unmarshaler is the interface implemented by objects that can unmarshal
  2091. // an XML element description of themselves.
  2092. //
  2093. // UnmarshalXML decodes a single XML element
  2094. // beginning with the given start element.
  2095. // If it returns an error, the outer call to Unmarshal stops and
  2096. // returns that error.
  2097. // UnmarshalXML must consume exactly one XML element.
  2098. // One common implementation strategy is to unmarshal into
  2099. // a separate value with a layout matching the expected XML
  2100. // using d.DecodeElement, and then to copy the data from
  2101. // that value into the receiver.
  2102. // Another common strategy is to use d.Token to process the
  2103. // XML object one token at a time.
  2104. // UnmarshalXML may not use d.RawToken.
  2105. type Unmarshaler interface {
  2106. UnmarshalXML(d *Decoder, start StartElement) error
  2107. }
  2108. // UnmarshalerAttr is the interface implemented by objects that can unmarshal
  2109. // an XML attribute description of themselves.
  2110. //
  2111. // UnmarshalXMLAttr decodes a single XML attribute.
  2112. // If it returns an error, the outer call to Unmarshal stops and
  2113. // returns that error.
  2114. // UnmarshalXMLAttr is used only for struct fields with the
  2115. // "attr" option in the field tag.
  2116. type UnmarshalerAttr interface {
  2117. UnmarshalXMLAttr(attr Attr) error
  2118. }
  2119. // receiverType returns the receiver type to use in an expression like "%s.MethodName".
  2120. func receiverType(val interface{}) string {
  2121. t := reflect.TypeOf(val)
  2122. if t.Name() != "" {
  2123. return t.String()
  2124. }
  2125. return "(" + t.String() + ")"
  2126. }
  2127. // unmarshalInterface unmarshals a single XML element into val.
  2128. // start is the opening tag of the element.
  2129. func (d *Decoder) unmarshalInterface(val Unmarshaler, start *StartElement) error {
  2130. // Record that decoder must stop at end tag corresponding to start.
  2131. d.pushEOF()
  2132. d.unmarshalDepth++
  2133. err := val.UnmarshalXML(d, *start)
  2134. d.unmarshalDepth--
  2135. if err != nil {
  2136. d.popEOF()
  2137. return err
  2138. }
  2139. if !d.popEOF() {
  2140. return fmt.Errorf("xml: %s.UnmarshalXML did not consume entire <%s> element", receiverType(val), start.Name.Local)
  2141. }
  2142. return nil
  2143. }
  2144. // unmarshalTextInterface unmarshals a single XML element into val.
  2145. // The chardata contained in the element (but not its children)
  2146. // is passed to the text unmarshaler.
  2147. func (d *Decoder) unmarshalTextInterface(val encoding.TextUnmarshaler) error {
  2148. var buf []byte
  2149. depth := 1
  2150. for depth > 0 {
  2151. t, err := d.Token()
  2152. if err != nil {
  2153. return err
  2154. }
  2155. switch t := t.(type) {
  2156. case CharData:
  2157. if depth == 1 {
  2158. buf = append(buf, t...)
  2159. }
  2160. case StartElement:
  2161. depth++
  2162. case EndElement:
  2163. depth--
  2164. }
  2165. }
  2166. return val.UnmarshalText(buf)
  2167. }
  2168. // unmarshalAttr unmarshals a single XML attribute into val.
  2169. func (d *Decoder) unmarshalAttr(val reflect.Value, attr Attr) error {
  2170. if val.Kind() == reflect.Ptr {
  2171. if val.IsNil() {
  2172. val.Set(reflect.New(val.Type().Elem()))
  2173. }
  2174. val = val.Elem()
  2175. }
  2176. if val.CanInterface() && val.Type().Implements(unmarshalerAttrType) {
  2177. // This is an unmarshaler with a non-pointer receiver,
  2178. // so it's likely to be incorrect, but we do what we're told.
  2179. return val.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
  2180. }
  2181. if val.CanAddr() {
  2182. pv := val.Addr()
  2183. if pv.CanInterface() && pv.Type().Implements(unmarshalerAttrType) {
  2184. return pv.Interface().(UnmarshalerAttr).UnmarshalXMLAttr(attr)
  2185. }
  2186. }
  2187. // Not an UnmarshalerAttr; try encoding.TextUnmarshaler.
  2188. if val.CanInterface() && val.Type().Implements(textUnmarshalerType) {
  2189. // This is an unmarshaler with a non-pointer receiver,
  2190. // so it's likely to be incorrect, but we do what we're told.
  2191. return val.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value))
  2192. }
  2193. if val.CanAddr() {
  2194. pv := val.Addr()
  2195. if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
  2196. return pv.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(attr.Value))
  2197. }
  2198. }
  2199. if val.Type().Kind() == reflect.Slice && val.Type().Elem().Kind() != reflect.Uint8 {
  2200. // Slice of element values.
  2201. // Grow slice.
  2202. n := val.Len()
  2203. val.Set(reflect.Append(val, reflect.Zero(val.Type().Elem())))
  2204. // Recur to read element into slice.
  2205. if err := d.unmarshalAttr(val.Index(n), attr); err != nil {
  2206. val.SetLen(n)
  2207. return err
  2208. }
  2209. return nil
  2210. }
  2211. if val.Type() == attrType {
  2212. val.Set(reflect.ValueOf(attr))
  2213. return nil
  2214. }
  2215. return copyValue(val, []byte(attr.Value))
  2216. }
  2217. var (
  2218. attrType = reflect.TypeOf(Attr{})
  2219. unmarshalerType = reflect.TypeOf((*Unmarshaler)(nil)).Elem()
  2220. unmarshalerAttrType = reflect.TypeOf((*UnmarshalerAttr)(nil)).Elem()
  2221. textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem()
  2222. )
  2223. // Unmarshal a single XML element into val.
  2224. func (d *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
  2225. // Find start element if we need it.
  2226. if start == nil {
  2227. for {
  2228. tok, err := d.Token()
  2229. if err != nil {
  2230. return err
  2231. }
  2232. if t, ok := tok.(StartElement); ok {
  2233. start = &t
  2234. break
  2235. }
  2236. }
  2237. }
  2238. // Load value from interface, but only if the result will be
  2239. // usefully addressable.
  2240. if val.Kind() == reflect.Interface && !val.IsNil() {
  2241. e := val.Elem()
  2242. if e.Kind() == reflect.Ptr && !e.IsNil() {
  2243. val = e
  2244. }
  2245. }
  2246. if val.Kind() == reflect.Ptr {
  2247. if val.IsNil() {
  2248. val.Set(reflect.New(val.Type().Elem()))
  2249. }
  2250. val = val.Elem()
  2251. }
  2252. if val.CanInterface() && val.Type().Implements(unmarshalerType) {
  2253. // This is an unmarshaler with a non-pointer receiver,
  2254. // so it's likely to be incorrect, but we do what we're told.
  2255. return d.unmarshalInterface(val.Interface().(Unmarshaler), start)
  2256. }
  2257. if val.CanAddr() {
  2258. pv := val.Addr()
  2259. if pv.CanInterface() && pv.Type().Implements(unmarshalerType) {
  2260. return d.unmarshalInterface(pv.Interface().(Unmarshaler), start)
  2261. }
  2262. }
  2263. if val.CanInterface() && val.Type().Implements(textUnmarshalerType) {
  2264. return d.unmarshalTextInterface(val.Interface().(encoding.TextUnmarshaler))
  2265. }
  2266. if val.CanAddr() {
  2267. pv := val.Addr()
  2268. if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
  2269. return d.unmarshalTextInterface(pv.Interface().(encoding.TextUnmarshaler))
  2270. }
  2271. }
  2272. var (
  2273. data []byte
  2274. saveData reflect.Value
  2275. comment []byte
  2276. saveComment reflect.Value
  2277. saveXML reflect.Value
  2278. saveXMLIndex int
  2279. saveXMLData []byte
  2280. saveAny reflect.Value
  2281. sv reflect.Value
  2282. tinfo *typeInfo
  2283. err error
  2284. )
  2285. switch v := val; v.Kind() {
  2286. default:
  2287. return errors.New("unknown type " + v.Type().String())
  2288. case reflect.Interface:
  2289. // TODO: For now, simply ignore the field. In the near
  2290. // future we may choose to unmarshal the start
  2291. // element on it, if not nil.
  2292. return d.Skip()
  2293. case reflect.Slice:
  2294. typ := v.Type()
  2295. if typ.Elem().Kind() == reflect.Uint8 {
  2296. // []byte
  2297. saveData = v
  2298. break
  2299. }
  2300. // Slice of element values.
  2301. // Grow slice.
  2302. n := v.Len()
  2303. v.Set(reflect.Append(val, reflect.Zero(v.Type().Elem())))
  2304. // Recur to read element into slice.
  2305. if err := d.unmarshal(v.Index(n), start); err != nil {
  2306. v.SetLen(n)
  2307. return err
  2308. }
  2309. return nil
  2310. case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String:
  2311. saveData = v
  2312. case reflect.Struct:
  2313. typ := v.Type()
  2314. if typ == nameType {
  2315. v.Set(reflect.ValueOf(start.Name))
  2316. break
  2317. }
  2318. sv = v
  2319. tinfo, err = getTypeInfo(typ)
  2320. if err != nil {
  2321. return err
  2322. }
  2323. // Validate and assign element name.
  2324. if tinfo.xmlname != nil {
  2325. finfo := tinfo.xmlname
  2326. if finfo.name != "" && finfo.name != start.Name.Local {
  2327. return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">")
  2328. }
  2329. if finfo.xmlns != "" && finfo.xmlns != start.Name.Space {
  2330. e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have "
  2331. if start.Name.Space == "" {
  2332. e += "no name space"
  2333. } else {
  2334. e += start.Name.Space
  2335. }
  2336. return UnmarshalError(e)
  2337. }
  2338. fv := finfo.value(sv)
  2339. if _, ok := fv.Interface().(Name); ok {
  2340. fv.Set(reflect.ValueOf(start.Name))
  2341. }
  2342. }
  2343. // Assign attributes.
  2344. for _, a := range start.Attr {
  2345. handled := false
  2346. any := -1
  2347. for i := range tinfo.fields {
  2348. finfo := &tinfo.fields[i]
  2349. switch finfo.flags & fMode {
  2350. case fAttr:
  2351. strv := finfo.value(sv)
  2352. if a.Name.Local == finfo.name /*&& (finfo.xmlns == "" || finfo.xmlns == a.Name.Space)*/ {
  2353. if err := d.unmarshalAttr(strv, a); err != nil {
  2354. return err
  2355. }
  2356. handled = true
  2357. }
  2358. case fAny | fAttr:
  2359. if any == -1 {
  2360. any = i
  2361. }
  2362. }
  2363. }
  2364. if !handled && any >= 0 {
  2365. finfo := &tinfo.fields[any]
  2366. strv := finfo.value(sv)
  2367. if err := d.unmarshalAttr(strv, a); err != nil {
  2368. return err
  2369. }
  2370. }
  2371. }
  2372. // Determine whether we need to save character data or comments.
  2373. for i := range tinfo.fields {
  2374. finfo := &tinfo.fields[i]
  2375. switch finfo.flags & fMode {
  2376. case fCDATA, fCharData:
  2377. if !saveData.IsValid() {
  2378. saveData = finfo.value(sv)
  2379. }
  2380. case fComment:
  2381. if !saveComment.IsValid() {
  2382. saveComment = finfo.value(sv)
  2383. }
  2384. case fAny, fAny | fElement:
  2385. if !saveAny.IsValid() {
  2386. saveAny = finfo.value(sv)
  2387. }
  2388. case fInnerXml:
  2389. if !saveXML.IsValid() {
  2390. saveXML = finfo.value(sv)
  2391. if d.saved == nil {
  2392. saveXMLIndex = 0
  2393. d.saved = new(bytes.Buffer)
  2394. } else {
  2395. saveXMLIndex = d.savedOffset()
  2396. }
  2397. }
  2398. }
  2399. }
  2400. }
  2401. // Find end element.
  2402. // Process sub-elements along the way.
  2403. Loop:
  2404. for {
  2405. var savedOffset int
  2406. if saveXML.IsValid() {
  2407. savedOffset = d.savedOffset()
  2408. }
  2409. tok, err := d.Token()
  2410. if err != nil {
  2411. return err
  2412. }
  2413. switch t := tok.(type) {
  2414. case StartElement:
  2415. consumed := false
  2416. if sv.IsValid() {
  2417. consumed, err = d.unmarshalPath(tinfo, sv, nil, &t)
  2418. if err != nil {
  2419. return err
  2420. }
  2421. if !consumed && saveAny.IsValid() {
  2422. consumed = true
  2423. if err := d.unmarshal(saveAny, &t); err != nil {
  2424. return err
  2425. }
  2426. }
  2427. }
  2428. if !consumed {
  2429. if err := d.Skip(); err != nil {
  2430. return err
  2431. }
  2432. }
  2433. case EndElement:
  2434. if saveXML.IsValid() {
  2435. saveXMLData = d.saved.Bytes()[saveXMLIndex:savedOffset]
  2436. if saveXMLIndex == 0 {
  2437. d.saved = nil
  2438. }
  2439. }
  2440. break Loop
  2441. case CharData:
  2442. if saveData.IsValid() {
  2443. data = append(data, t...)
  2444. }
  2445. case Comment:
  2446. if saveComment.IsValid() {
  2447. comment = append(comment, t...)
  2448. }
  2449. }
  2450. }
  2451. if saveData.IsValid() && saveData.CanInterface() && saveData.Type().Implements(textUnmarshalerType) {
  2452. if err := saveData.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil {
  2453. return err
  2454. }
  2455. saveData = reflect.Value{}
  2456. }
  2457. if saveData.IsValid() && saveData.CanAddr() {
  2458. pv := saveData.Addr()
  2459. if pv.CanInterface() && pv.Type().Implements(textUnmarshalerType) {
  2460. if err := pv.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil {
  2461. return err
  2462. }
  2463. saveData = reflect.Value{}
  2464. }
  2465. }
  2466. if err := copyValue(saveData, data); err != nil {
  2467. return err
  2468. }
  2469. switch t := saveComment; t.Kind() {
  2470. case reflect.String:
  2471. t.SetString(string(comment))
  2472. case reflect.Slice:
  2473. t.Set(reflect.ValueOf(comment))
  2474. }
  2475. switch t := saveXML; t.Kind() {
  2476. case reflect.String:
  2477. t.SetString(string(saveXMLData))
  2478. case reflect.Slice:
  2479. if t.Type().Elem().Kind() == reflect.Uint8 {
  2480. t.Set(reflect.ValueOf(saveXMLData))
  2481. }
  2482. }
  2483. return nil
  2484. }
  2485. func copyValue(dst reflect.Value, src []byte) (err error) {
  2486. dst0 := dst
  2487. if dst.Kind() == reflect.Ptr {
  2488. if dst.IsNil() {
  2489. dst.Set(reflect.New(dst.Type().Elem()))
  2490. }
  2491. dst = dst.Elem()
  2492. }
  2493. // Save accumulated data.
  2494. switch dst.Kind() {
  2495. case reflect.Invalid:
  2496. // Probably a comment.
  2497. default:
  2498. return errors.New("cannot unmarshal into " + dst0.Type().String())
  2499. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  2500. if len(src) == 0 {
  2501. dst.SetInt(0)
  2502. return nil
  2503. }
  2504. itmp, err := strconv.ParseInt(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
  2505. if err != nil {
  2506. return err
  2507. }
  2508. dst.SetInt(itmp)
  2509. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  2510. if len(src) == 0 {
  2511. dst.SetUint(0)
  2512. return nil
  2513. }
  2514. utmp, err := strconv.ParseUint(strings.TrimSpace(string(src)), 10, dst.Type().Bits())
  2515. if err != nil {
  2516. return err
  2517. }
  2518. dst.SetUint(utmp)
  2519. case reflect.Float32, reflect.Float64:
  2520. if len(src) == 0 {
  2521. dst.SetFloat(0)
  2522. return nil
  2523. }
  2524. ftmp, err := strconv.ParseFloat(strings.TrimSpace(string(src)), dst.Type().Bits())
  2525. if err != nil {
  2526. return err
  2527. }
  2528. dst.SetFloat(ftmp)
  2529. case reflect.Bool:
  2530. if len(src) == 0 {
  2531. dst.SetBool(false)
  2532. return nil
  2533. }
  2534. value, err := strconv.ParseBool(strings.TrimSpace(string(src)))
  2535. if err != nil {
  2536. return err
  2537. }
  2538. dst.SetBool(value)
  2539. case reflect.String:
  2540. dst.SetString(string(src))
  2541. case reflect.Slice:
  2542. if len(src) == 0 {
  2543. // non-nil to flag presence
  2544. src = []byte{}
  2545. }
  2546. dst.SetBytes(src)
  2547. }
  2548. return nil
  2549. }
  2550. // unmarshalPath walks down an XML structure looking for wanted
  2551. // paths, and calls unmarshal on them.
  2552. // The consumed result tells whether XML elements have been consumed
  2553. // from the Decoder until start's matching end element, or if it's
  2554. // still untouched because start is uninteresting for sv's fields.
  2555. func (d *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) {
  2556. recurse := false
  2557. Loop:
  2558. for i := range tinfo.fields {
  2559. finfo := &tinfo.fields[i]
  2560. if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) /*|| finfo.xmlns != "" && finfo.xmlns != start.Name.Space*/ {
  2561. continue
  2562. }
  2563. for j := range parents {
  2564. if parents[j] != finfo.parents[j] {
  2565. continue Loop
  2566. }
  2567. }
  2568. if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local {
  2569. // It's a perfect match, unmarshal the field.
  2570. return true, d.unmarshal(finfo.value(sv), start)
  2571. }
  2572. if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local {
  2573. // It's a prefix for the field. Break and recurse
  2574. // since it's not ok for one field path to be itself
  2575. // the prefix for another field path.
  2576. recurse = true
  2577. // We can reuse the same slice as long as we
  2578. // don't try to append to it.
  2579. parents = finfo.parents[:len(parents)+1]
  2580. break
  2581. }
  2582. }
  2583. if !recurse {
  2584. // We have no business with this element.
  2585. return false, nil
  2586. }
  2587. // The element is not a perfect match for any field, but one
  2588. // or more fields have the path to this element as a parent
  2589. // prefix. Recurse and attempt to match these.
  2590. for {
  2591. var tok Token
  2592. tok, err = d.Token()
  2593. if err != nil {
  2594. return true, err
  2595. }
  2596. switch t := tok.(type) {
  2597. case StartElement:
  2598. consumed2, err := d.unmarshalPath(tinfo, sv, parents, &t)
  2599. if err != nil {
  2600. return true, err
  2601. }
  2602. if !consumed2 {
  2603. if err := d.Skip(); err != nil {
  2604. return true, err
  2605. }
  2606. }
  2607. case EndElement:
  2608. return true, nil
  2609. }
  2610. }
  2611. }
  2612. // Skip reads tokens until it has consumed the end element
  2613. // matching the most recent start element already consumed.
  2614. // It recurs if it encounters a start element, so it can be used to
  2615. // skip nested structures.
  2616. // It returns nil if it finds an end element matching the start
  2617. // element; otherwise it returns an error describing the problem.
  2618. func (d *Decoder) Skip() error {
  2619. for {
  2620. tok, err := d.Token()
  2621. if err != nil {
  2622. return err
  2623. }
  2624. switch tok.(type) {
  2625. case StartElement:
  2626. if err := d.Skip(); err != nil {
  2627. return err
  2628. }
  2629. case EndElement:
  2630. return nil
  2631. }
  2632. }
  2633. }
  2634. // typeinfo.go
  2635. // typeInfo holds details for the xml representation of a type.
  2636. type typeInfo struct {
  2637. xmlname *fieldInfo
  2638. fields []fieldInfo
  2639. }
  2640. // fieldInfo holds details for the xml representation of a single field.
  2641. type fieldInfo struct {
  2642. idx []int
  2643. name string
  2644. xmlns string
  2645. flags fieldFlags
  2646. parents []string
  2647. }
  2648. type fieldFlags int
  2649. const (
  2650. fElement fieldFlags = 1 << iota
  2651. fAttr
  2652. fCDATA
  2653. fCharData
  2654. fInnerXml
  2655. fComment
  2656. fAny
  2657. fOmitEmpty
  2658. fMode = fElement | fAttr | fCDATA | fCharData | fInnerXml | fComment | fAny
  2659. xmlName = "XMLName"
  2660. )
  2661. var tinfoMap sync.Map // map[reflect.Type]*typeInfo
  2662. var nameType = reflect.TypeOf(Name{})
  2663. // getTypeInfo returns the typeInfo structure with details necessary
  2664. // for marshaling and unmarshaling typ.
  2665. func getTypeInfo(typ reflect.Type) (*typeInfo, error) {
  2666. if ti, ok := tinfoMap.Load(typ); ok {
  2667. return ti.(*typeInfo), nil
  2668. }
  2669. tinfo := &typeInfo{}
  2670. if typ.Kind() == reflect.Struct && typ != nameType {
  2671. n := typ.NumField()
  2672. for i := 0; i < n; i++ {
  2673. f := typ.Field(i)
  2674. if (f.PkgPath != "" && !f.Anonymous) || f.Tag.Get("xml") == "-" {
  2675. continue // Private field
  2676. }
  2677. // For embedded structs, embed its fields.
  2678. if f.Anonymous {
  2679. t := f.Type
  2680. if t.Kind() == reflect.Ptr {
  2681. t = t.Elem()
  2682. }
  2683. if t.Kind() == reflect.Struct {
  2684. inner, err := getTypeInfo(t)
  2685. if err != nil {
  2686. return nil, err
  2687. }
  2688. if tinfo.xmlname == nil {
  2689. tinfo.xmlname = inner.xmlname
  2690. }
  2691. for _, finfo := range inner.fields {
  2692. finfo.idx = append([]int{i}, finfo.idx...)
  2693. if err := addFieldInfo(typ, tinfo, &finfo); err != nil {
  2694. return nil, err
  2695. }
  2696. }
  2697. continue
  2698. }
  2699. }
  2700. finfo, err := structFieldInfo(typ, &f)
  2701. if err != nil {
  2702. return nil, err
  2703. }
  2704. if f.Name == xmlName {
  2705. tinfo.xmlname = finfo
  2706. continue
  2707. }
  2708. // Add the field if it doesn't conflict with other fields.
  2709. if err := addFieldInfo(typ, tinfo, finfo); err != nil {
  2710. return nil, err
  2711. }
  2712. }
  2713. }
  2714. ti, _ := tinfoMap.LoadOrStore(typ, tinfo)
  2715. return ti.(*typeInfo), nil
  2716. }
  2717. // structFieldInfo builds and returns a fieldInfo for f.
  2718. func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) {
  2719. finfo := &fieldInfo{idx: f.Index}
  2720. // Split the tag from the xml namespace if necessary.
  2721. tag := f.Tag.Get("xml")
  2722. if i := strings.Index(tag, " "); i >= 0 {
  2723. finfo.xmlns, tag = tag[:i], tag[i+1:]
  2724. }
  2725. // Parse flags.
  2726. tokens := strings.Split(tag, ",")
  2727. if len(tokens) == 1 {
  2728. finfo.flags = fElement
  2729. } else {
  2730. tag = tokens[0]
  2731. for _, flag := range tokens[1:] {
  2732. switch flag {
  2733. case "attr":
  2734. finfo.flags |= fAttr
  2735. case "cdata":
  2736. finfo.flags |= fCDATA
  2737. case "chardata":
  2738. finfo.flags |= fCharData
  2739. case "innerxml":
  2740. finfo.flags |= fInnerXml
  2741. case "comment":
  2742. finfo.flags |= fComment
  2743. case "any":
  2744. finfo.flags |= fAny
  2745. case "omitempty":
  2746. finfo.flags |= fOmitEmpty
  2747. }
  2748. }
  2749. // Validate the flags used.
  2750. valid := true
  2751. switch mode := finfo.flags & fMode; mode {
  2752. case 0:
  2753. finfo.flags |= fElement
  2754. case fAttr, fCDATA, fCharData, fInnerXml, fComment, fAny, fAny | fAttr:
  2755. if f.Name == xmlName || tag != "" && mode != fAttr {
  2756. valid = false
  2757. }
  2758. default:
  2759. // This will also catch multiple modes in a single field.
  2760. valid = false
  2761. }
  2762. if finfo.flags&fMode == fAny {
  2763. finfo.flags |= fElement
  2764. }
  2765. if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 {
  2766. valid = false
  2767. }
  2768. if !valid {
  2769. return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q",
  2770. f.Name, typ, f.Tag.Get("xml"))
  2771. }
  2772. }
  2773. // Use of xmlns without a name is not allowed.
  2774. if finfo.xmlns != "" && tag == "" {
  2775. return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q",
  2776. f.Name, typ, f.Tag.Get("xml"))
  2777. }
  2778. if f.Name == xmlName {
  2779. // The XMLName field records the XML element name. Don't
  2780. // process it as usual because its name should default to
  2781. // empty rather than to the field name.
  2782. finfo.name = tag
  2783. return finfo, nil
  2784. }
  2785. if tag == "" {
  2786. // If the name part of the tag is completely empty, get
  2787. // default from XMLName of underlying struct if feasible,
  2788. // or field name otherwise.
  2789. if xmlname := lookupXMLName(f.Type); xmlname != nil {
  2790. finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name
  2791. } else {
  2792. finfo.name = f.Name
  2793. }
  2794. return finfo, nil
  2795. }
  2796. // Prepare field name and parents.
  2797. parents := strings.Split(tag, ">")
  2798. if parents[0] == "" {
  2799. parents[0] = f.Name
  2800. }
  2801. if parents[len(parents)-1] == "" {
  2802. return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ)
  2803. }
  2804. finfo.name = parents[len(parents)-1]
  2805. if len(parents) > 1 {
  2806. if (finfo.flags & fElement) == 0 {
  2807. return nil, fmt.Errorf("xml: %s chain not valid with %s flag", tag, strings.Join(tokens[1:], ","))
  2808. }
  2809. finfo.parents = parents[:len(parents)-1]
  2810. }
  2811. // If the field type has an XMLName field, the names must match
  2812. // so that the behavior of both marshaling and unmarshaling
  2813. // is straightforward and unambiguous.
  2814. if finfo.flags&fElement != 0 {
  2815. ftyp := f.Type
  2816. xmlname := lookupXMLName(ftyp)
  2817. if xmlname != nil && xmlname.name != finfo.name {
  2818. return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName",
  2819. finfo.name, typ, f.Name, xmlname.name, ftyp)
  2820. }
  2821. }
  2822. return finfo, nil
  2823. }
  2824. // lookupXMLName returns the fieldInfo for typ's XMLName field
  2825. // in case it exists and has a valid xml field tag, otherwise
  2826. // it returns nil.
  2827. func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) {
  2828. for typ.Kind() == reflect.Ptr {
  2829. typ = typ.Elem()
  2830. }
  2831. if typ.Kind() != reflect.Struct {
  2832. return nil
  2833. }
  2834. for i, n := 0, typ.NumField(); i < n; i++ {
  2835. f := typ.Field(i)
  2836. if f.Name != xmlName {
  2837. continue
  2838. }
  2839. finfo, err := structFieldInfo(typ, &f)
  2840. if err == nil && finfo.name != "" {
  2841. return finfo
  2842. }
  2843. // Also consider errors as a non-existent field tag
  2844. // and let getTypeInfo itself report the error.
  2845. break
  2846. }
  2847. return nil
  2848. }
  2849. func min(a, b int) int {
  2850. if a <= b {
  2851. return a
  2852. }
  2853. return b
  2854. }
  2855. // addFieldInfo adds finfo to tinfo.fields if there are no
  2856. // conflicts, or if conflicts arise from previous fields that were
  2857. // obtained from deeper embedded structures than finfo. In the latter
  2858. // case, the conflicting entries are dropped.
  2859. // A conflict occurs when the path (parent + name) to a field is
  2860. // itself a prefix of another path, or when two paths match exactly.
  2861. // It is okay for field paths to share a common, shorter prefix.
  2862. func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error {
  2863. var conflicts []int
  2864. Loop:
  2865. // First, figure all conflicts. Most working code will have none.
  2866. for i := range tinfo.fields {
  2867. oldf := &tinfo.fields[i]
  2868. if oldf.flags&fMode != newf.flags&fMode {
  2869. continue
  2870. }
  2871. if oldf.xmlns != "" && newf.xmlns != "" && oldf.xmlns != newf.xmlns {
  2872. continue
  2873. }
  2874. minl := min(len(newf.parents), len(oldf.parents))
  2875. for p := 0; p < minl; p++ {
  2876. if oldf.parents[p] != newf.parents[p] {
  2877. continue Loop
  2878. }
  2879. }
  2880. if len(oldf.parents) > len(newf.parents) {
  2881. if oldf.parents[len(newf.parents)] == newf.name {
  2882. conflicts = append(conflicts, i)
  2883. }
  2884. } else if len(oldf.parents) < len(newf.parents) {
  2885. if newf.parents[len(oldf.parents)] == oldf.name {
  2886. conflicts = append(conflicts, i)
  2887. }
  2888. } else {
  2889. if newf.name == oldf.name {
  2890. conflicts = append(conflicts, i)
  2891. }
  2892. }
  2893. }
  2894. // Without conflicts, add the new field and return.
  2895. if conflicts == nil {
  2896. tinfo.fields = append(tinfo.fields, *newf)
  2897. return nil
  2898. }
  2899. // If any conflict is shallower, ignore the new field.
  2900. // This matches the Go field resolution on embedding.
  2901. for _, i := range conflicts {
  2902. if len(tinfo.fields[i].idx) < len(newf.idx) {
  2903. return nil
  2904. }
  2905. }
  2906. // Otherwise, if any of them is at the same depth level, it's an error.
  2907. for _, i := range conflicts {
  2908. oldf := &tinfo.fields[i]
  2909. if len(oldf.idx) == len(newf.idx) {
  2910. f1 := typ.FieldByIndex(oldf.idx)
  2911. f2 := typ.FieldByIndex(newf.idx)
  2912. return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")}
  2913. }
  2914. }
  2915. // Otherwise, the new field is shallower, and thus takes precedence,
  2916. // so drop the conflicting fields from tinfo and append the new one.
  2917. for c := len(conflicts) - 1; c >= 0; c-- {
  2918. i := conflicts[c]
  2919. copy(tinfo.fields[i:], tinfo.fields[i+1:])
  2920. tinfo.fields = tinfo.fields[:len(tinfo.fields)-1]
  2921. }
  2922. tinfo.fields = append(tinfo.fields, *newf)
  2923. return nil
  2924. }
  2925. // A TagPathError represents an error in the unmarshaling process
  2926. // caused by the use of field tags with conflicting paths.
  2927. type TagPathError struct {
  2928. Struct reflect.Type
  2929. Field1, Tag1 string
  2930. Field2, Tag2 string
  2931. }
  2932. func (e *TagPathError) Error() string {
  2933. return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2)
  2934. }
  2935. // value returns v's field value corresponding to finfo.
  2936. // It's equivalent to v.FieldByIndex(finfo.idx), but initializes
  2937. // and dereferences pointers as necessary.
  2938. func (finfo *fieldInfo) value(v reflect.Value) reflect.Value {
  2939. for i, x := range finfo.idx {
  2940. if i > 0 {
  2941. t := v.Type()
  2942. if t.Kind() == reflect.Ptr && t.Elem().Kind() == reflect.Struct {
  2943. if v.IsNil() {
  2944. v.Set(reflect.New(v.Type().Elem()))
  2945. }
  2946. v = v.Elem()
  2947. }
  2948. }
  2949. v = v.Field(x)
  2950. }
  2951. return v
  2952. }