Skip to content

Commit

Permalink
Handle encoding binary data separately
Browse files Browse the repository at this point in the history
Binary data should be encoded as such when interpolated, for example in
bind variables. Otherwise valuable type information that the value is
actually binary is lost.

Also means that we should not mark literals in the parser as binary
either. They are strings and there's other introducers like `_binary`
that end up marking things as proper binary.

Signed-off-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
dbussink committed Oct 17, 2024
1 parent 0fa7e58 commit 2c56bcf
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 55 deletions.
57 changes: 34 additions & 23 deletions go/sqltypes/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package sqltypes

import (
"bytes"
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
Expand Down Expand Up @@ -441,6 +440,8 @@ func (v Value) EncodeSQL(b BinWriter) {
switch {
case v.Type() == Null:
b.Write(NullBytes)
case v.IsBinary():
encodeBinarySQL(v.val, b)
case v.IsQuoted():
encodeBytesSQL(v.val, b)
case v.Type() == Bit:
Expand All @@ -456,6 +457,8 @@ func (v Value) EncodeSQLStringBuilder(b *strings.Builder) {
switch {
case v.Type() == Null:
b.Write(NullBytes)
case v.IsBinary():
encodeBinarySQLStringBuilder(v.val, b)
case v.IsQuoted():
encodeBytesSQLStringBuilder(v.val, b)
case v.Type() == Bit:
Expand All @@ -482,6 +485,8 @@ func (v Value) EncodeSQLBytes2(b *bytes2.Buffer) {
switch {
case v.Type() == Null:
b.Write(NullBytes)
case v.IsBinary():
encodeBinarySQLBytes2(v.val, b)
case v.IsQuoted():
encodeBytesSQLBytes2(v.val, b)
case v.Type() == Bit:
Expand All @@ -491,18 +496,6 @@ func (v Value) EncodeSQLBytes2(b *bytes2.Buffer) {
}
}

// EncodeASCII encodes the value using 7-bit clean ascii bytes.
func (v Value) EncodeASCII(b BinWriter) {
switch {
case v.Type() == Null:
b.Write(NullBytes)
case v.IsQuoted() || v.Type() == Bit:
encodeBytesASCII(v.val, b)
default:
b.Write(v.val)
}
}

// IsNull returns true if Value is null.
func (v Value) IsNull() bool {
return v.Type() == Null
Expand Down Expand Up @@ -758,6 +751,34 @@ func (v Value) TinyWeight() uint32 {
return v.tinyweight
}

func encodeBinarySQL(val []byte, b BinWriter) {
buf := &bytes2.Buffer{}
encodeBinarySQLBytes2(val, buf)
b.Write(buf.Bytes())
}

const hextable = "0123456789ABCDEF"

func encodeBinarySQLBytes2(val []byte, buf *bytes2.Buffer) {
buf.WriteByte('X')
buf.WriteByte('\'')
for _, ch := range val {
buf.WriteByte(hextable[ch>>4])
buf.WriteByte(hextable[ch&0x0f])
}
buf.WriteByte('\'')
}

func encodeBinarySQLStringBuilder(val []byte, buf *strings.Builder) {
buf.WriteByte('X')
buf.WriteByte('\'')
for _, ch := range val {
buf.WriteByte(hextable[ch>>4])
buf.WriteByte(hextable[ch&0x0f])
}
buf.WriteByte('\'')
}

func encodeBytesSQL(val []byte, b BinWriter) {
buf := &bytes2.Buffer{}
encodeBytesSQLBytes2(val, buf)
Expand Down Expand Up @@ -838,16 +859,6 @@ func encodeBytesSQLBits(val []byte, b BinWriter) {
fmt.Fprint(b, "'")
}

func encodeBytesASCII(val []byte, b BinWriter) {
buf := &bytes2.Buffer{}
buf.WriteByte('\'')
encoder := base64.NewEncoder(base64.StdEncoding, buf)
encoder.Write(val)
encoder.Close()
buf.WriteByte('\'')
b.Write(buf.Bytes())
}

// SQLEncodeMap specifies how to escape binary data with '\'.
// Complies to https://dev.mysql.com/doc/refman/5.7/en/string-literals.html
// Handling escaping of % and _ is different than other characters.
Expand Down
35 changes: 16 additions & 19 deletions go/sqltypes/value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,34 +367,28 @@ func TestEncode(t *testing.T) {
outSQL string
outASCII string
}{{
in: NULL,
outSQL: "null",
outASCII: "null",
in: NULL,
outSQL: "null",
}, {
in: TestValue(Int64, "1"),
outSQL: "1",
outASCII: "1",
in: TestValue(Int64, "1"),
outSQL: "1",
}, {
in: TestValue(VarChar, "foo"),
outSQL: "'foo'",
outASCII: "'Zm9v'",
in: TestValue(VarChar, "foo"),
outSQL: "'foo'",
}, {
in: TestValue(VarChar, "\x00'\"\b\n\r\t\x1A\\"),
outSQL: "'\\0\\'\"\\b\\n\\r\\t\\Z\\\\'",
outASCII: "'ACciCAoNCRpc'",
in: TestValue(VarChar, "\x00'\"\b\n\r\t\x1A\\"),
outSQL: "'\\0\\'\"\\b\\n\\r\\t\\Z\\\\'",
}, {
in: TestValue(VarBinary, "\x00'\"\b\n\r\t\x1A\\"),
outSQL: "X'002722080A0D091A5C'",
}, {
in: TestValue(Bit, "a"),
outSQL: "b'01100001'",
outASCII: "'YQ=='",
in: TestValue(Bit, "a"),
outSQL: "b'01100001'",
}}
for _, tcase := range testcases {
var buf strings.Builder
tcase.in.EncodeSQL(&buf)
assert.Equal(t, tcase.outSQL, buf.String())

buf.Reset()
tcase.in.EncodeASCII(&buf)
assert.Equal(t, tcase.outASCII, buf.String())
}
}

Expand Down Expand Up @@ -639,6 +633,9 @@ func TestEncodeSQLStringBuilder(t *testing.T) {
}, {
in: TestTuple(TestValue(Int64, "1"), TestValue(VarChar, "foo")),
outSQL: "(1, 'foo')",
}, {
in: TestValue(VarBinary, "foo"),
outSQL: "X'666F6F'",
}}
for _, tcase := range testcases {
var buf strings.Builder
Expand Down
2 changes: 1 addition & 1 deletion go/vt/sqlparser/ast_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ func (node *AssignmentExpr) Format(buf *TrackedBuffer) {
func (node *Literal) Format(buf *TrackedBuffer) {
switch node.Type {
case StrVal:
sqltypes.MakeTrusted(sqltypes.VarBinary, node.Bytes()).EncodeSQL(buf)
sqltypes.MakeTrusted(sqltypes.VarChar, node.Bytes()).EncodeSQL(buf)
case IntVal, FloatVal, DecimalVal, HexNum, BitNum:
buf.astPrintf(node, "%#s", node.Val)
case HexVal:
Expand Down
2 changes: 1 addition & 1 deletion go/vt/sqlparser/ast_format_fast.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 13 additions & 4 deletions go/vt/sqlparser/encodable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,30 @@ func TestEncodable(t *testing.T) {
in Encodable
out string
}{{
in: InsertValues{{
sqltypes.NewInt64(1),
sqltypes.NewVarChar("foo('a')"),
}, {
sqltypes.NewInt64(2),
sqltypes.NewVarChar("bar(`b`)"),
}},
out: "(1, 'foo(\\'a\\')'), (2, 'bar(`b`)')",
}, {
in: InsertValues{{
sqltypes.NewInt64(1),
sqltypes.NewVarBinary("foo('a')"),
}, {
sqltypes.NewInt64(2),
sqltypes.NewVarBinary("bar(`b`)"),
}},
out: "(1, 'foo(\\'a\\')'), (2, 'bar(`b`)')",
out: "(1, X'666F6F2827612729'), (2, X'6261722860626029')",
}, {
// Single column.
in: &TupleEqualityList{
Columns: []IdentifierCI{NewIdentifierCI("pk")},
Rows: [][]sqltypes.Value{
{sqltypes.NewInt64(1)},
{sqltypes.NewVarBinary("aa")},
{sqltypes.NewVarChar("aa")},
},
},
out: "pk in (1, 'aa')",
Expand All @@ -53,11 +62,11 @@ func TestEncodable(t *testing.T) {
Rows: [][]sqltypes.Value{
{
sqltypes.NewInt64(1),
sqltypes.NewVarBinary("aa"),
sqltypes.NewVarChar("aa"),
},
{
sqltypes.NewInt64(2),
sqltypes.NewVarBinary("bb"),
sqltypes.NewVarChar("bb"),
},
},
},
Expand Down
6 changes: 3 additions & 3 deletions go/vt/sqlparser/parsed_query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func TestGenerateQuery(t *testing.T) {
Columns: []IdentifierCI{NewIdentifierCI("pk")},
Rows: [][]sqltypes.Value{
{sqltypes.NewInt64(1)},
{sqltypes.NewVarBinary("aa")},
{sqltypes.NewVarChar("aa")},
},
},
},
Expand All @@ -132,11 +132,11 @@ func TestGenerateQuery(t *testing.T) {
Rows: [][]sqltypes.Value{
{
sqltypes.NewInt64(1),
sqltypes.NewVarBinary("aa"),
sqltypes.NewVarChar("aa"),
},
{
sqltypes.NewInt64(2),
sqltypes.NewVarBinary("bb"),
sqltypes.NewVarChar("bb"),
},
},
},
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vtgate/evalengine/translate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ func TestTranslateSimplification(t *testing.T) {
{"coalesce(NULL, 2, NULL, 4)", ok("coalesce(null, 2, null, 4)"), ok("2")},
{"coalesce(NULL, NULL)", ok("coalesce(null, null)"), ok("null")},
{"coalesce(NULL)", ok("coalesce(null)"), ok("null")},
{"weight_string('foobar')", ok(`weight_string('foobar')`), ok("'\x1c\xe5\x1d\xdd\x1d\xdd\x1c`\x1cG\x1e3'")},
{"weight_string('foobar' as char(12))", ok(`weight_string('foobar' as char(12))`), ok("'\x1c\xe5\x1d\xdd\x1d\xdd\x1c`\x1cG\x1e3'")},
{"weight_string('foobar')", ok(`weight_string('foobar')`), ok("X'1CE51DDD1DDD1C601C471E33'")},
{"weight_string('foobar' as char(12))", ok(`weight_string('foobar' as char(12))`), ok("X'1CE51DDD1DDD1C601C471E33'")},
{"case when 1 = 1 then 2 else 3 end", ok("case when 1 = 1 then 2 else 3"), ok("2")},
{"case when null then 2 when 12 = 4 then 'ohnoes' else 42 end", ok(`case when null then 2 when 12 = 4 then 'ohnoes' else 42`), ok(`'42'`)},
{"convert('a', char(2) character set utf8mb4)", ok(`convert('a', CHAR(2) character set utf8mb4_0900_ai_ci)`), ok(`'a'`)},
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/planbuilder/testdata/dml_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -2823,7 +2823,7 @@
"Query": "delete from `user` where `name` = _binary 'abc'",
"Table": "user",
"Values": [
"'abc'"
"X'616263'"
],
"Vindex": "name_user_map"
},
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@
"Query": "select * from `user` where id = 0x04",
"Table": "`user`",
"Values": [
"'\u0004'"
"X'04'"
],
"Vindex": "user_index"
},
Expand Down

0 comments on commit 2c56bcf

Please sign in to comment.