2007-10-21 Paolo Bonzini * Sets.st: Fix all sort of off-by-one errors. * iconvtests.st: Add regression tests. --- orig/packages/iconv/Sets.st +++ mod/packages/iconv/Sets.st @@ -996,10 +996,10 @@ Iconv is skipped altogether and only Sma ifTrue: [readBuffer replaceFrom: 1 - to: readEnd - readPos + to: readEnd - readPos + 1 with: readBuffer startingAt: readPos. - readEnd := readEnd - readPos. + readEnd := readEnd - readPos + 1. readPos := 1]. [readEnd >= self bufferSize or: [self atEndOfInput]] whileFalse: [readEnd := readEnd + 1. @@ -1044,7 +1044,7 @@ Iconv is skipped altogether and only Sma convertMore [ - | bytesLeft fine | + | oldReadPos bytesLeft fine | recodedBuffer isNil ifTrue: [self initBuffers]. readBuffer isNil ifTrue: [^true]. iconvHandle isNil ifTrue: [self iconvOpen]. @@ -1058,15 +1058,18 @@ Iconv is skipped altogether and only Sma to: recodedBuffer size: self bufferSize state: bytesLeft. - readPos := self bufferSize - (bytesLeft at: 1). + oldReadPos := readPos. + readPos := readEnd + 1 - (bytesLeft at: 1). recodedEnd := self bufferSize - (bytesLeft at: 2). recodedPos := 1. fine ifFalse: [InvalidSequenceError signal. ^true]. + readPos > readEnd ifFalse: [ + readPos = oldReadPos ifTrue: [ IncompleteSequenceError signal ]. + ^readPos = oldReadPos ]. self atEndOfInput ifFalse: [^false]. "At end of input, check whether the last character was complete." readBuffer := nil. - readPos <= readEnd ifTrue: [IncompleteSequenceError signal. ^true]. ^recodedEnd = 0 ] ] --- orig/packages/iconv/iconvtests.st +++ mod/packages/iconv/iconvtests.st @@ -180,5 +180,53 @@ TestCase subclass: IconvTest [ str := UnicodeString with: $<16r10FFFF>. self assert: (str asString: 'UTF-7') asString = '+2//f/w-' ] + + testRoundTrip [ + + | s | + s := String new: 1 withAll: $x. + self assert: (s asUnicodeString asString: 'UTF-8') = s. + s := String new: 1024 withAll: $x. + self assert: (s asUnicodeString asString: 'UTF-8') = s. + s := String new: 1025 withAll: $x. + self assert: (s asUnicodeString asString: 'UTF-8') = s. + s := UnicodeString new: 1 withAll: $x. + self assert: (s asString: 'UTF-8') asUnicodeString = s. + s := UnicodeString new: 1024 withAll: $x. + self assert: (s asString: 'UTF-8') asUnicodeString = s. + s := UnicodeString new: 1025 withAll: $x. + self assert: (s asString: 'UTF-8') asUnicodeString = s. + s := UnicodeString new: 1025 withAll: $<16r4000>. + self assert: (s asString: 'UTF-8') asUnicodeString = s. + s := UnicodeString new: 1025 withAll: $<16r4000>. + self assert: (s asString: 'UTF-8') asUnicodeString = s + ] + + testExceptions [ + + | b | + self should: [ #[228] asUnicodeString ] raise: IncompleteSequenceError. + self should: [ #[128] asUnicodeString ] raise: InvalidSequenceError. + self should: [ #[228 128] asUnicodeString ] raise: IncompleteSequenceError. + + "On some OSes we return IncompleteSequenceError for the following." + "self should: [ #[228 228] asUnicodeString ] raise: InvalidSequenceError." + + b := ByteArray new: 1026. + b atAll: (1 to: 1026 by: 3) put: 228. + b atAll: (2 to: 1026 by: 3) put: 128. + b atAll: (3 to: 1026 by: 3) put: 128. + self shouldnt: [ b asUnicodeString ] raise: IncompleteSequenceError. + + b := b copyFrom: 1 to: 1025. + self should: [ b asUnicodeString ] raise: IncompleteSequenceError. + + b at: 1025 put: 228. + "On some OSes we return IncompleteSequenceError for the following." + "self should: [ b asUnicodeString ] raise: InvalidSequenceError." + + b := b copyFrom: 1 to: 1024. + self should: [ b asUnicodeString ] raise: IncompleteSequenceError. + ] ]