|
| 1 | +// =============================================================================== |
| 2 | +// RFC5987 Decoder |
| 3 | +// |
| 4 | +// http://greenbytes.de/tech/webdav/rfc5987.html |
| 5 | +// =============================================================================== |
| 6 | +// Copyright Steven Robbins. All rights reserved. |
| 7 | +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY |
| 8 | +// OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT |
| 9 | +// LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 10 | +// FITNESS FOR A PARTICULAR PURPOSE. |
| 11 | +// =============================================================================== |
| 12 | +namespace HttpMultipartParser |
| 13 | +{ |
| 14 | + using System; |
| 15 | + using System.Collections.Generic; |
| 16 | + using System.Globalization; |
| 17 | + using System.Linq; |
| 18 | + using System.Text; |
| 19 | + using System.Text.RegularExpressions; |
| 20 | + |
| 21 | + /// <summary> |
| 22 | + /// Provides a way to decode the value of so-called "star-parameters" |
| 23 | + /// according to RFC 5987 which is superceded by RFC 8187. |
| 24 | + /// |
| 25 | + /// <see href="https://www.rfc-editor.org/rfc/rfc5987">RFC 5987</see> |
| 26 | + /// <see href="https://www.rfc-editor.org/rfc/rfc8187">RFC 8187</see> |
| 27 | + /// <see href="https://author-tools.ietf.org/diff?doc_1=5987&doc_2=8187">Handy side-by-side comparison</see> of the two RFCs. |
| 28 | + /// </summary> |
| 29 | + /// <remarks>Taken from <see href="https://github.com/grumpydev/RFC5987-Decoder" />.</remarks> |
| 30 | + public static class RFC5987 |
| 31 | + { |
| 32 | + /// <summary> |
| 33 | + /// Regex for the encoded string format detailed in |
| 34 | + /// http://greenbytes.de/tech/webdav/rfc5987.html. |
| 35 | + /// </summary> |
| 36 | + private static readonly Regex EncodedStringRegex = new Regex(@"(?:(?<charset>.*?))'(?<language>.*?)?'(?<encodeddata>.*?)$", RegexOptions.Compiled); |
| 37 | + |
| 38 | + /// <summary> |
| 39 | + /// Decode a RFC5987 encoded value. |
| 40 | + /// </summary> |
| 41 | + /// <param name="inputString">Encoded input string.</param> |
| 42 | + /// <returns>Decoded string.</returns> |
| 43 | + public static string Decode(string inputString) |
| 44 | + { |
| 45 | + return EncodedStringRegex.Replace( |
| 46 | + inputString, |
| 47 | + m => |
| 48 | + { |
| 49 | + var characterSet = m.Groups["charset"].Value; |
| 50 | + var language = m.Groups["language"].Value; |
| 51 | + var encodedData = m.Groups["encodeddata"].Value; |
| 52 | + |
| 53 | + if (!IsSupportedCharacterSet(characterSet)) |
| 54 | + { |
| 55 | + // Fall back to iso-8859-1 if invalid/unsupported character set found |
| 56 | + characterSet = @"UTF-8"; |
| 57 | + } |
| 58 | + |
| 59 | + var textEncoding = Encoding.GetEncoding(characterSet); |
| 60 | + |
| 61 | + return textEncoding.GetString(GetDecodedBytes(encodedData).ToArray()); |
| 62 | + }); |
| 63 | + } |
| 64 | + |
| 65 | + /// <summary> |
| 66 | + /// Get the decoded bytes from the encoded data string. |
| 67 | + /// </summary> |
| 68 | + /// <param name="encodedData">Encoded data.</param> |
| 69 | + /// <returns>Decoded bytes.</returns> |
| 70 | + private static IEnumerable<byte> GetDecodedBytes(string encodedData) |
| 71 | + { |
| 72 | + var encodedCharacters = encodedData.ToCharArray(); |
| 73 | + for (int i = 0; i < encodedCharacters.Length; i++) |
| 74 | + { |
| 75 | + if (encodedCharacters[i] == '%') |
| 76 | + { |
| 77 | + var hexString = new string(encodedCharacters, i + 1, 2); |
| 78 | + |
| 79 | + i += 2; |
| 80 | + |
| 81 | + int characterValue; |
| 82 | + if (int.TryParse(hexString, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out characterValue)) |
| 83 | + { |
| 84 | + yield return (byte)characterValue; |
| 85 | + } |
| 86 | + } |
| 87 | + else |
| 88 | + { |
| 89 | + yield return (byte)encodedCharacters[i]; |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + /// <summary> |
| 95 | + /// Determines if a character set is supported. |
| 96 | + /// </summary> |
| 97 | + /// <param name="characterSet">Character set name.</param> |
| 98 | + /// <returns>Bool representing whether the character set is supported.</returns> |
| 99 | + private static bool IsSupportedCharacterSet(string characterSet) |
| 100 | + { |
| 101 | + return Encoding.GetEncodings() |
| 102 | + .Where(e => string.Equals(e.Name, characterSet, StringComparison.InvariantCultureIgnoreCase)) |
| 103 | + .Any(); |
| 104 | + } |
| 105 | + } |
| 106 | +} |
0 commit comments