Answers:
您可以使用替换正则表达式。
s/[;,\t\r ]|[\n]{2}/\n/g
s/
开头意味着搜索[
和]
是搜索(以任何顺序)的字符/
分隔搜索文本和替换文本用英语写成:
“搜索;
或,
或\t
或\r
或(空间)或正好两个连续,
\n
然后将其替换为\n
”
在C#中,您可以执行以下操作:(导入后System.Text.RegularExpressions
)
Regex pattern = new Regex("[;,\t\r ]|[\n]{2}");
pattern.Replace(myString, "\n");
\s
实际上相当于[ \f\n\r\t\v]
所以你包括一些东西存在,这不是在原来的问题。另外,原始问题会询问Replace("\n\n", "\n")
您的正则表达式无法处理的问题。
如果您感觉特别聪明并且不想使用正则表达式:
char[] separators = new char[]{' ',';',',','\r','\t','\n'};
string s = "this;is,\ra\t\n\n\ntest";
string[] temp = s.Split(separators, StringSplitOptions.RemoveEmptyEntries);
s = String.Join("\n", temp);
您也可以毫不费力地将其包装在扩展方法中。
编辑:或只是等待2分钟,我最终还是会写它:)
public static class ExtensionMethods
{
public static string Replace(this string s, char[] separators, string newVal)
{
string[] temp;
temp = s.Split(separators, StringSplitOptions.RemoveEmptyEntries);
return String.Join( newVal, temp );
}
}
瞧...
char[] separators = new char[]{' ',';',',','\r','\t','\n'};
string s = "this;is,\ra\t\n\n\ntest";
s = s.Replace(separators, "\n");
Regex.Replace
比string.Replace
连续多次通话慢8倍以上。并且比Split
+ 慢4倍Join
。参见gist.github.com/MarcinJuraszek/c1437d925548561ba210a1c6ed144452
您可以使用Linq的Aggregate函数:
string s = "the\nquick\tbrown\rdog,jumped;over the lazy fox.";
char[] chars = new char[] { ' ', ';', ',', '\r', '\t', '\n' };
string snew = chars.Aggregate(s, (c1, c2) => c1.Replace(c2, '\n'));
这是扩展方法:
public static string ReplaceAll(this string seed, char[] chars, char replacementCharacter)
{
return chars.Aggregate(seed, (str, cItem) => str.Replace(cItem, replacementCharacter));
}
扩展方法的用法示例:
string snew = s.ReplaceAll(chars, '\n');
这是最短的方法:
myString = Regex.Replace(myString, @"[;,\t\r ]|[\n]{2}", "\n");
哦,表演恐怖!答案有点过时,但仍然...
public static class StringUtils
{
#region Private members
[ThreadStatic]
private static StringBuilder m_ReplaceSB;
private static StringBuilder GetReplaceSB(int capacity)
{
var result = m_ReplaceSB;
if (null == result)
{
result = new StringBuilder(capacity);
m_ReplaceSB = result;
}
else
{
result.Clear();
result.EnsureCapacity(capacity);
}
return result;
}
public static string ReplaceAny(this string s, char replaceWith, params char[] chars)
{
if (null == chars)
return s;
if (null == s)
return null;
StringBuilder sb = null;
for (int i = 0, count = s.Length; i < count; i++)
{
var temp = s[i];
var replace = false;
for (int j = 0, cc = chars.Length; j < cc; j++)
if (temp == chars[j])
{
if (null == sb)
{
sb = GetReplaceSB(count);
if (i > 0)
sb.Append(s, 0, i);
}
replace = true;
break;
}
if (replace)
sb.Append(replaceWith);
else
if (null != sb)
sb.Append(temp);
}
return null == sb ? s : sb.ToString();
}
}
您只需要使其可变即可:
StringBuilder
unsafe
世界并与指针一起玩(虽然很危险)并尝试迭代字符数组最少的时间。请注意HashSet
此处,因为它避免遍历循环内的字符序列。如果您需要更快的查找,则可以用(基于)HashSet
的优化查找来代替。char
array[256]
StringBuilder的示例
public static void MultiReplace(this StringBuilder builder,
char[] toReplace,
char replacement)
{
HashSet<char> set = new HashSet<char>(toReplace);
for (int i = 0; i < builder.Length; ++i)
{
var currentCharacter = builder[i];
if (set.Contains(currentCharacter))
{
builder[i] = replacement;
}
}
}
编辑-优化版本
public static void MultiReplace(this StringBuilder builder,
char[] toReplace,
char replacement)
{
var set = new bool[256];
foreach (var charToReplace in toReplace)
{
set[charToReplace] = true;
}
for (int i = 0; i < builder.Length; ++i)
{
var currentCharacter = builder[i];
if (set[currentCharacter])
{
builder[i] = replacement;
}
}
}
然后,您可以像这样使用它:
var builder = new StringBuilder("my bad,url&slugs");
builder.MultiReplace(new []{' ', '&', ','}, '-');
var result = builder.ToString();
wchar_t
在.net中,您只替换了所有可能字符的子集(并且您将需要65536个布尔变量来优化它……)
您也可以简单地编写这些字符串扩展方法,然后将它们放在解决方案中的某个位置:
using System.Text;
public static class StringExtensions
{
public static string ReplaceAll(this string original, string toBeReplaced, string newValue)
{
if (string.IsNullOrEmpty(original) || string.IsNullOrEmpty(toBeReplaced)) return original;
if (newValue == null) newValue = string.Empty;
StringBuilder sb = new StringBuilder();
foreach (char ch in original)
{
if (toBeReplaced.IndexOf(ch) < 0) sb.Append(ch);
else sb.Append(newValue);
}
return sb.ToString();
}
public static string ReplaceAll(this string original, string[] toBeReplaced, string newValue)
{
if (string.IsNullOrEmpty(original) || toBeReplaced == null || toBeReplaced.Length <= 0) return original;
if (newValue == null) newValue = string.Empty;
foreach (string str in toBeReplaced)
if (!string.IsNullOrEmpty(str))
original = original.Replace(str, newValue);
return original;
}
}
像这样称呼他们:
"ABCDE".ReplaceAll("ACE", "xy");
xyBxyDxy
还有这个:
"ABCDEF".ReplaceAll(new string[] { "AB", "DE", "EF" }, "xy");
xyCxyF
使用RegEx.Replace,如下所示:
string input = "This is text with far too much " +
"whitespace.";
string pattern = "[;,]";
string replacement = "\n";
Regex rgx = new Regex(pattern);
string result = rgx.Replace(input, replacement);
这是有关RegEx的MSDN文档的更多信息。
Performance-Wise这可能不是最佳解决方案,但它可以工作。
var str = "filename:with&bad$separators.txt";
char[] charArray = new char[] { '#', '%', '&', '{', '}', '\\', '<', '>', '*', '?', '/', ' ', '$', '!', '\'', '"', ':', '@' };
foreach (var singleChar in charArray)
{
str = str.Replace(singleChar, '_');
}
string ToBeReplaceCharacters = @"~()@#$%&+,'"<>|;\/*?";
string fileName = "filename;with<bad:separators?";
foreach (var RepChar in ToBeReplaceCharacters)
{
fileName = fileName.Replace(RepChar.ToString(), "");
}
\t
并且\r
包含在中\s
。因此,您的正则表达式等效于[;,\s]
。