schema.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "encoding/json"
  17. "errors"
  18. "fmt"
  19. "reflect"
  20. "sync"
  21. bq "google.golang.org/api/bigquery/v2"
  22. )
  23. // Schema describes the fields in a table or query result.
  24. type Schema []*FieldSchema
  25. // FieldSchema describes a single field.
  26. type FieldSchema struct {
  27. // The field name.
  28. // Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
  29. // and must start with a letter or underscore.
  30. // The maximum length is 128 characters.
  31. Name string
  32. // A description of the field. The maximum length is 16,384 characters.
  33. Description string
  34. // Whether the field may contain multiple values.
  35. Repeated bool
  36. // Whether the field is required. Ignored if Repeated is true.
  37. Required bool
  38. // The field data type. If Type is Record, then this field contains a nested schema,
  39. // which is described by Schema.
  40. Type FieldType
  41. // Describes the nested schema if Type is set to Record.
  42. Schema Schema
  43. }
  44. func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
  45. tfs := &bq.TableFieldSchema{
  46. Description: fs.Description,
  47. Name: fs.Name,
  48. Type: string(fs.Type),
  49. }
  50. if fs.Repeated {
  51. tfs.Mode = "REPEATED"
  52. } else if fs.Required {
  53. tfs.Mode = "REQUIRED"
  54. } // else leave as default, which is interpreted as NULLABLE.
  55. for _, f := range fs.Schema {
  56. tfs.Fields = append(tfs.Fields, f.toBQ())
  57. }
  58. return tfs
  59. }
  60. func (s Schema) toBQ() *bq.TableSchema {
  61. var fields []*bq.TableFieldSchema
  62. for _, f := range s {
  63. fields = append(fields, f.toBQ())
  64. }
  65. return &bq.TableSchema{Fields: fields}
  66. }
  67. func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
  68. fs := &FieldSchema{
  69. Description: tfs.Description,
  70. Name: tfs.Name,
  71. Repeated: tfs.Mode == "REPEATED",
  72. Required: tfs.Mode == "REQUIRED",
  73. Type: FieldType(tfs.Type),
  74. }
  75. for _, f := range tfs.Fields {
  76. fs.Schema = append(fs.Schema, bqToFieldSchema(f))
  77. }
  78. return fs
  79. }
  80. func bqToSchema(ts *bq.TableSchema) Schema {
  81. if ts == nil {
  82. return nil
  83. }
  84. var s Schema
  85. for _, f := range ts.Fields {
  86. s = append(s, bqToFieldSchema(f))
  87. }
  88. return s
  89. }
  90. // FieldType is the type of field.
  91. type FieldType string
  92. const (
  93. // StringFieldType is a string field type.
  94. StringFieldType FieldType = "STRING"
  95. // BytesFieldType is a bytes field type.
  96. BytesFieldType FieldType = "BYTES"
  97. // IntegerFieldType is a integer field type.
  98. IntegerFieldType FieldType = "INTEGER"
  99. // FloatFieldType is a float field type.
  100. FloatFieldType FieldType = "FLOAT"
  101. // BooleanFieldType is a boolean field type.
  102. BooleanFieldType FieldType = "BOOLEAN"
  103. // TimestampFieldType is a timestamp field type.
  104. TimestampFieldType FieldType = "TIMESTAMP"
  105. // RecordFieldType is a record field type. It is typically used to create columns with repeated or nested data.
  106. RecordFieldType FieldType = "RECORD"
  107. // DateFieldType is a date field type.
  108. DateFieldType FieldType = "DATE"
  109. // TimeFieldType is a time field type.
  110. TimeFieldType FieldType = "TIME"
  111. // DateTimeFieldType is a datetime field type.
  112. DateTimeFieldType FieldType = "DATETIME"
  113. // NumericFieldType is a numeric field type. Numeric types include integer types, floating point types and the
  114. // NUMERIC data type.
  115. NumericFieldType FieldType = "NUMERIC"
  116. )
  117. var (
  118. errNoStruct = errors.New("bigquery: can only infer schema from struct or pointer to struct")
  119. errUnsupportedFieldType = errors.New("bigquery: unsupported type of field in struct")
  120. errInvalidFieldName = errors.New("bigquery: invalid name of field in struct")
  121. errBadNullable = errors.New(`bigquery: use "nullable" only for []byte and struct pointers; for all other types, use a NullXXX type`)
  122. errEmptyJSONSchema = errors.New("bigquery: empty JSON schema")
  123. fieldTypes = map[FieldType]bool{
  124. StringFieldType: true,
  125. BytesFieldType: true,
  126. IntegerFieldType: true,
  127. FloatFieldType: true,
  128. BooleanFieldType: true,
  129. TimestampFieldType: true,
  130. RecordFieldType: true,
  131. DateFieldType: true,
  132. TimeFieldType: true,
  133. DateTimeFieldType: true,
  134. NumericFieldType: true,
  135. }
  136. )
  137. var typeOfByteSlice = reflect.TypeOf([]byte{})
  138. // InferSchema tries to derive a BigQuery schema from the supplied struct value.
  139. // Each exported struct field is mapped to a field in the schema.
  140. //
  141. // The following BigQuery types are inferred from the corresponding Go types.
  142. // (This is the same mapping as that used for RowIterator.Next.) Fields inferred
  143. // from these types are marked required (non-nullable).
  144. //
  145. // STRING string
  146. // BOOL bool
  147. // INTEGER int, int8, int16, int32, int64, uint8, uint16, uint32
  148. // FLOAT float32, float64
  149. // BYTES []byte
  150. // TIMESTAMP time.Time
  151. // DATE civil.Date
  152. // TIME civil.Time
  153. // DATETIME civil.DateTime
  154. // NUMERIC *big.Rat
  155. //
  156. // The big.Rat type supports numbers of arbitrary size and precision. Values
  157. // will be rounded to 9 digits after the decimal point before being transmitted
  158. // to BigQuery. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
  159. // for more on NUMERIC.
  160. //
  161. // A Go slice or array type is inferred to be a BigQuery repeated field of the
  162. // element type. The element type must be one of the above listed types.
  163. //
  164. // Nullable fields are inferred from the NullXXX types, declared in this package:
  165. //
  166. // STRING NullString
  167. // BOOL NullBool
  168. // INTEGER NullInt64
  169. // FLOAT NullFloat64
  170. // TIMESTAMP NullTimestamp
  171. // DATE NullDate
  172. // TIME NullTime
  173. // DATETIME NullDateTime
  174. //
  175. // For a nullable BYTES field, use the type []byte and tag the field "nullable" (see below).
  176. // For a nullable NUMERIC field, use the type *big.Rat and tag the field "nullable".
  177. //
  178. // A struct field that is of struct type is inferred to be a required field of type
  179. // RECORD with a schema inferred recursively. For backwards compatibility, a field of
  180. // type pointer to struct is also inferred to be required. To get a nullable RECORD
  181. // field, use the "nullable" tag (see below).
  182. //
  183. // InferSchema returns an error if any of the examined fields is of type uint,
  184. // uint64, uintptr, map, interface, complex64, complex128, func, or chan. Future
  185. // versions may handle these cases without error.
  186. //
  187. // Recursively defined structs are also disallowed.
  188. //
  189. // Struct fields may be tagged in a way similar to the encoding/json package.
  190. // A tag of the form
  191. // bigquery:"name"
  192. // uses "name" instead of the struct field name as the BigQuery field name.
  193. // A tag of the form
  194. // bigquery:"-"
  195. // omits the field from the inferred schema.
  196. // The "nullable" option marks the field as nullable (not required). It is only
  197. // needed for []byte, *big.Rat and pointer-to-struct fields, and cannot appear on other
  198. // fields. In this example, the Go name of the field is retained:
  199. // bigquery:",nullable"
  200. func InferSchema(st interface{}) (Schema, error) {
  201. return inferSchemaReflectCached(reflect.TypeOf(st))
  202. }
  203. var schemaCache sync.Map
  204. type cacheVal struct {
  205. schema Schema
  206. err error
  207. }
  208. func inferSchemaReflectCached(t reflect.Type) (Schema, error) {
  209. var cv cacheVal
  210. v, ok := schemaCache.Load(t)
  211. if ok {
  212. cv = v.(cacheVal)
  213. } else {
  214. s, err := inferSchemaReflect(t)
  215. cv = cacheVal{s, err}
  216. schemaCache.Store(t, cv)
  217. }
  218. return cv.schema, cv.err
  219. }
  220. func inferSchemaReflect(t reflect.Type) (Schema, error) {
  221. rec, err := hasRecursiveType(t, nil)
  222. if err != nil {
  223. return nil, err
  224. }
  225. if rec {
  226. return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t)
  227. }
  228. return inferStruct(t)
  229. }
  230. func inferStruct(t reflect.Type) (Schema, error) {
  231. switch t.Kind() {
  232. case reflect.Ptr:
  233. if t.Elem().Kind() != reflect.Struct {
  234. return nil, errNoStruct
  235. }
  236. t = t.Elem()
  237. fallthrough
  238. case reflect.Struct:
  239. return inferFields(t)
  240. default:
  241. return nil, errNoStruct
  242. }
  243. }
  244. // inferFieldSchema infers the FieldSchema for a Go type
  245. func inferFieldSchema(rt reflect.Type, nullable bool) (*FieldSchema, error) {
  246. // Only []byte and struct pointers can be tagged nullable.
  247. if nullable && !(rt == typeOfByteSlice || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) {
  248. return nil, errBadNullable
  249. }
  250. switch rt {
  251. case typeOfByteSlice:
  252. return &FieldSchema{Required: !nullable, Type: BytesFieldType}, nil
  253. case typeOfGoTime:
  254. return &FieldSchema{Required: true, Type: TimestampFieldType}, nil
  255. case typeOfDate:
  256. return &FieldSchema{Required: true, Type: DateFieldType}, nil
  257. case typeOfTime:
  258. return &FieldSchema{Required: true, Type: TimeFieldType}, nil
  259. case typeOfDateTime:
  260. return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil
  261. case typeOfRat:
  262. return &FieldSchema{Required: !nullable, Type: NumericFieldType}, nil
  263. }
  264. if ft := nullableFieldType(rt); ft != "" {
  265. return &FieldSchema{Required: false, Type: ft}, nil
  266. }
  267. if isSupportedIntType(rt) || isSupportedUintType(rt) {
  268. return &FieldSchema{Required: true, Type: IntegerFieldType}, nil
  269. }
  270. switch rt.Kind() {
  271. case reflect.Slice, reflect.Array:
  272. et := rt.Elem()
  273. if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) {
  274. // Multi dimensional slices/arrays are not supported by BigQuery
  275. return nil, errUnsupportedFieldType
  276. }
  277. if nullableFieldType(et) != "" {
  278. // Repeated nullable types are not supported by BigQuery.
  279. return nil, errUnsupportedFieldType
  280. }
  281. f, err := inferFieldSchema(et, false)
  282. if err != nil {
  283. return nil, err
  284. }
  285. f.Repeated = true
  286. f.Required = false
  287. return f, nil
  288. case reflect.Ptr:
  289. if rt.Elem().Kind() != reflect.Struct {
  290. return nil, errUnsupportedFieldType
  291. }
  292. fallthrough
  293. case reflect.Struct:
  294. nested, err := inferStruct(rt)
  295. if err != nil {
  296. return nil, err
  297. }
  298. return &FieldSchema{Required: !nullable, Type: RecordFieldType, Schema: nested}, nil
  299. case reflect.String:
  300. return &FieldSchema{Required: !nullable, Type: StringFieldType}, nil
  301. case reflect.Bool:
  302. return &FieldSchema{Required: !nullable, Type: BooleanFieldType}, nil
  303. case reflect.Float32, reflect.Float64:
  304. return &FieldSchema{Required: !nullable, Type: FloatFieldType}, nil
  305. default:
  306. return nil, errUnsupportedFieldType
  307. }
  308. }
  309. // inferFields extracts all exported field types from struct type.
  310. func inferFields(rt reflect.Type) (Schema, error) {
  311. var s Schema
  312. fields, err := fieldCache.Fields(rt)
  313. if err != nil {
  314. return nil, err
  315. }
  316. for _, field := range fields {
  317. var nullable bool
  318. for _, opt := range field.ParsedTag.([]string) {
  319. if opt == nullableTagOption {
  320. nullable = true
  321. break
  322. }
  323. }
  324. f, err := inferFieldSchema(field.Type, nullable)
  325. if err != nil {
  326. return nil, err
  327. }
  328. f.Name = field.Name
  329. s = append(s, f)
  330. }
  331. return s, nil
  332. }
  333. // isSupportedIntType reports whether t is an int type that can be properly
  334. // represented by the BigQuery INTEGER/INT64 type.
  335. func isSupportedIntType(t reflect.Type) bool {
  336. switch t.Kind() {
  337. case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
  338. return true
  339. default:
  340. return false
  341. }
  342. }
  343. // isSupportedIntType reports whether t is a uint type that can be properly
  344. // represented by the BigQuery INTEGER/INT64 type.
  345. func isSupportedUintType(t reflect.Type) bool {
  346. switch t.Kind() {
  347. case reflect.Uint8, reflect.Uint16, reflect.Uint32:
  348. return true
  349. default:
  350. return false
  351. }
  352. }
  353. // typeList is a linked list of reflect.Types.
  354. type typeList struct {
  355. t reflect.Type
  356. next *typeList
  357. }
  358. func (l *typeList) has(t reflect.Type) bool {
  359. for l != nil {
  360. if l.t == t {
  361. return true
  362. }
  363. l = l.next
  364. }
  365. return false
  366. }
  367. // hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly,
  368. // via exported fields. (Schema inference ignores unexported fields.)
  369. func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) {
  370. for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array {
  371. t = t.Elem()
  372. }
  373. if t.Kind() != reflect.Struct {
  374. return false, nil
  375. }
  376. if seen.has(t) {
  377. return true, nil
  378. }
  379. fields, err := fieldCache.Fields(t)
  380. if err != nil {
  381. return false, err
  382. }
  383. seen = &typeList{t, seen}
  384. // Because seen is a linked list, additions to it from one field's
  385. // recursive call will not affect the value for subsequent fields' calls.
  386. for _, field := range fields {
  387. ok, err := hasRecursiveType(field.Type, seen)
  388. if err != nil {
  389. return false, err
  390. }
  391. if ok {
  392. return true, nil
  393. }
  394. }
  395. return false, nil
  396. }
  397. // bigQuerySchemaJSONField is an individual field in a JSON BigQuery table schema definition
  398. // (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema).
  399. type bigQueryJSONField struct {
  400. Description string `json:"description"`
  401. Fields []bigQueryJSONField `json:"fields"`
  402. Mode string `json:"mode"`
  403. Name string `json:"name"`
  404. Type string `json:"type"`
  405. }
  406. // convertSchemaFromJSON generates a Schema:
  407. func convertSchemaFromJSON(fs []bigQueryJSONField) (Schema, error) {
  408. convertedSchema := Schema{}
  409. for _, f := range fs {
  410. convertedFieldSchema := &FieldSchema{
  411. Description: f.Description,
  412. Name: f.Name,
  413. Required: f.Mode == "REQUIRED",
  414. Repeated: f.Mode == "REPEATED",
  415. }
  416. if len(f.Fields) > 0 {
  417. convertedNestedFieldSchema, err := convertSchemaFromJSON(f.Fields)
  418. if err != nil {
  419. return nil, err
  420. }
  421. convertedFieldSchema.Schema = convertedNestedFieldSchema
  422. }
  423. // Check that the field-type (string) maps to a known FieldType:
  424. if _, ok := fieldTypes[FieldType(f.Type)]; !ok {
  425. return nil, fmt.Errorf("unknown field type (%v)", f.Type)
  426. }
  427. convertedFieldSchema.Type = FieldType(f.Type)
  428. convertedSchema = append(convertedSchema, convertedFieldSchema)
  429. }
  430. return convertedSchema, nil
  431. }
  432. // SchemaFromJSON takes a JSON BigQuery table schema definition
  433. // (as generated by https://github.com/GoogleCloudPlatform/protoc-gen-bq-schema)
  434. // and returns a fully-populated Schema.
  435. func SchemaFromJSON(schemaJSON []byte) (Schema, error) {
  436. var bigQuerySchema []bigQueryJSONField
  437. // Make sure we actually have some content:
  438. if len(schemaJSON) == 0 {
  439. return nil, errEmptyJSONSchema
  440. }
  441. if err := json.Unmarshal(schemaJSON, &bigQuerySchema); err != nil {
  442. return nil, err
  443. }
  444. return convertSchemaFromJSON(bigQuerySchema)
  445. }