我需要转换one
成1
,two
成2
等。
有没有办法用库或类或其他东西做到这一点?
我需要转换one
成1
,two
成2
等。
有没有办法用库或类或其他东西做到这一点?
Answers:
此代码的大部分内容是设置数字单词dict,仅在首次调用时才完成。
def text2int(textnum, numwords={}):
if not numwords:
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
for word in textnum.split():
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current
print text2int("seven billion one hundred million thirty one thousand three hundred thirty seven")
#7100031337
print text2int("nineteen ninety six") # 115
我刚为确切的目的向PyPI发布了一个名为word2number的python模块。https://github.com/akshaynagpal/w2n
使用以下方法安装:
pip install word2number
确保您的点子已更新为最新版本。
用法:
from word2number import w2n
print w2n.word_to_num("two million three thousand nine hundred and eighty four")
2003984
"1 million"
或"1M"
。w2n.word_to_num(“ 1百万”)引发错误。
如果有人感兴趣,我会破解一个可以保留其余字符串的版本(尽管它可能存在错误,但并没有对其进行过多测试)。
def text2int (textnum, numwords={}):
if not numwords:
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
ordinal_endings = [('ieth', 'y'), ('th', '')]
textnum = textnum.replace('-', ' ')
current = result = 0
curstring = ""
onnumber = False
for word in textnum.split():
if word in ordinal_words:
scale, increment = (1, ordinal_words[word])
current = current * scale + increment
if scale > 100:
result += current
current = 0
onnumber = True
else:
for ending, replacement in ordinal_endings:
if word.endswith(ending):
word = "%s%s" % (word[:-len(ending)], replacement)
if word not in numwords:
if onnumber:
curstring += repr(result + current) + " "
curstring += word + " "
result = current = 0
onnumber = False
else:
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
onnumber = True
if onnumber:
curstring += repr(result + current)
return curstring
例:
>>> text2int("I want fifty five hot dogs for two hundred dollars.")
I want 55 hot dogs for 200 dollars.
如果您说“ $ 200”,可能会有问题。但是,这确实很粗糙。
我需要一些不同的东西,因为我的输入是从语音到文本的转换,解决方案并不总是将数字相加。例如,“我的邮政编码是一二三四五”不应转换为“我的邮政编码是15”。
我采纳了安德鲁的答案,并对其进行了调整,以处理人们强调为错误的其他一些情况,并且还增加了对示例的支持,例如我上面提到的邮政编码。下面显示了一些基本的测试用例,但我确定仍有改进的空间。
def is_number(x):
if type(x) == str:
x = x.replace(',', '')
try:
float(x)
except:
return False
return True
def text2int (textnum, numwords={}):
units = [
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
'sixteen', 'seventeen', 'eighteen', 'nineteen',
]
tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
scales = ['hundred', 'thousand', 'million', 'billion', 'trillion']
ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
ordinal_endings = [('ieth', 'y'), ('th', '')]
if not numwords:
numwords['and'] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
textnum = textnum.replace('-', ' ')
current = result = 0
curstring = ''
onnumber = False
lastunit = False
lastscale = False
def is_numword(x):
if is_number(x):
return True
if word in numwords:
return True
return False
def from_numword(x):
if is_number(x):
scale = 0
increment = int(x.replace(',', ''))
return scale, increment
return numwords[x]
for word in textnum.split():
if word in ordinal_words:
scale, increment = (1, ordinal_words[word])
current = current * scale + increment
if scale > 100:
result += current
current = 0
onnumber = True
lastunit = False
lastscale = False
else:
for ending, replacement in ordinal_endings:
if word.endswith(ending):
word = "%s%s" % (word[:-len(ending)], replacement)
if (not is_numword(word)) or (word == 'and' and not lastscale):
if onnumber:
# Flush the current number we are building
curstring += repr(result + current) + " "
curstring += word + " "
result = current = 0
onnumber = False
lastunit = False
lastscale = False
else:
scale, increment = from_numword(word)
onnumber = True
if lastunit and (word not in scales):
# Assume this is part of a string of individual numbers to
# be flushed, such as a zipcode "one two three four five"
curstring += repr(result + current)
result = current = 0
if scale > 1:
current = max(1, current)
current = current * scale + increment
if scale > 100:
result += current
current = 0
lastscale = False
lastunit = False
if word in scales:
lastscale = True
elif word in units:
lastunit = True
if onnumber:
curstring += repr(result + current)
return curstring
一些测试...
one two three -> 123
three forty five -> 345
three and forty five -> 3 and 45
three hundred and forty five -> 345
three hundred -> 300
twenty five hundred -> 2500
three thousand and six -> 3006
three thousand six -> 3006
nineteenth -> 19
twentieth -> 20
first -> 1
my zip is one two three four five -> my zip is 12345
nineteen ninety six -> 1996
fifty-seventh -> 57
one million -> 1000000
first hundred -> 100
I will buy the first thousand -> I will buy the 1000 # probably should leave ordinal in the string
thousand -> 1000
hundred and six -> 106
1 million -> 1000000
感谢您的代码片段……为我节省了很多时间!
我需要处理几个额外的解析案例,例如序数词(“第一个”,“第二个”),带连字符的词(“一百个”)和带连字符的序数词(例如“第五十七个”),所以我添加了几行:
def text2int(textnum, numwords={}):
if not numwords:
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
ordinal_endings = [('ieth', 'y'), ('th', '')]
textnum = textnum.replace('-', ' ')
current = result = 0
for word in textnum.split():
if word in ordinal_words:
scale, increment = (1, ordinal_words[word])
else:
for ending, replacement in ordinal_endings:
if word.endswith(ending):
word = "%s%s" % (word[:-len(ending)], replacement)
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current`
hundredth
,thousandth
等等。使用one hundredth
得到100
!
这是第一个答案中的代码的C#实现:
public static double ConvertTextToNumber(string text)
{
string[] units = new string[] {
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
};
string[] tens = new string[] {"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
string[] scales = new string[] { "hundred", "thousand", "million", "billion", "trillion" };
Dictionary<string, ScaleIncrementPair> numWord = new Dictionary<string, ScaleIncrementPair>();
numWord.Add("and", new ScaleIncrementPair(1, 0));
for (int i = 0; i < units.Length; i++)
{
numWord.Add(units[i], new ScaleIncrementPair(1, i));
}
for (int i = 1; i < tens.Length; i++)
{
numWord.Add(tens[i], new ScaleIncrementPair(1, i * 10));
}
for (int i = 0; i < scales.Length; i++)
{
if(i == 0)
numWord.Add(scales[i], new ScaleIncrementPair(100, 0));
else
numWord.Add(scales[i], new ScaleIncrementPair(Math.Pow(10, (i*3)), 0));
}
double current = 0;
double result = 0;
foreach (var word in text.Split(new char[] { ' ', '-', '—'}))
{
ScaleIncrementPair scaleIncrement = numWord[word];
current = current * scaleIncrement.scale + scaleIncrement.increment;
if (scaleIncrement.scale > 100)
{
result += current;
current = 0;
}
}
return result + current;
}
public struct ScaleIncrementPair
{
public double scale;
public int increment;
public ScaleIncrementPair(double s, int i)
{
scale = s;
increment = i;
}
}
e_h的C#实现的快速而又肮脏的Java端口(如上所述)。请注意,两者都返回double,而不是int。
public class Text2Double {
public double Text2Double(String text) {
String[] units = new String[]{
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
};
String[] tens = new String[]{"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
String[] scales = new String[]{"hundred", "thousand", "million", "billion", "trillion"};
Map<String, ScaleIncrementPair> numWord = new LinkedHashMap<>();
numWord.put("and", new ScaleIncrementPair(1, 0));
for (int i = 0; i < units.length; i++) {
numWord.put(units[i], new ScaleIncrementPair(1, i));
}
for (int i = 1; i < tens.length; i++) {
numWord.put(tens[i], new ScaleIncrementPair(1, i * 10));
}
for (int i = 0; i < scales.length; i++) {
if (i == 0)
numWord.put(scales[i], new ScaleIncrementPair(100, 0));
else
numWord.put(scales[i], new ScaleIncrementPair(Math.pow(10, (i * 3)), 0));
}
double current = 0;
double result = 0;
for(String word : text.split("[ -]"))
{
ScaleIncrementPair scaleIncrement = numWord.get(word);
current = current * scaleIncrement.scale + scaleIncrement.increment;
if (scaleIncrement.scale > 100) {
result += current;
current = 0;
}
}
return result + current;
}
}
public class ScaleIncrementPair
{
public double scale;
public int increment;
public ScaleIncrementPair(double s, int i)
{
scale = s;
increment = i;
}
}
进行更改,以便text2int(scale)将返回正确的转换。例如,text2int(“ hundred”)=> 100。
import re
numwords = {}
def text2int(textnum):
if not numwords:
units = [ "zero", "one", "two", "three", "four", "five", "six",
"seven", "eight", "nine", "ten", "eleven", "twelve",
"thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
"eighteen", "nineteen"]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty",
"seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion",
'quadrillion', 'quintillion', 'sexillion', 'septillion',
'octillion', 'nonillion', 'decillion' ]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5,
'eighth':8, 'ninth':9, 'twelfth':12}
ordinal_endings = [('ieth', 'y'), ('th', '')]
current = result = 0
tokens = re.split(r"[\s-]+", textnum)
for word in tokens:
if word in ordinal_words:
scale, increment = (1, ordinal_words[word])
else:
for ending, replacement in ordinal_endings:
if word.endswith(ending):
word = "%s%s" % (word[:-len(ending)], replacement)
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
if scale > 1:
current = max(1, current)
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current
马克·伯恩斯(Marc Burns)有一颗红宝石可以做到这一点。我最近叉了它以增加支持多年。您可以从python调用ruby代码。
require 'numbers_in_words'
require 'numbers_in_words/duck_punch'
nums = ["fifteen sixteen", "eighty five sixteen", "nineteen ninety six",
"one hundred and seventy nine", "thirteen hundred", "nine thousand two hundred and ninety seven"]
nums.each {|n| p n; p n.in_numbers}
结果:
"fifteen sixteen"
1516
"eighty five sixteen"
8516
"nineteen ninety six"
1996
"one hundred and seventy nine"
179
"thirteen hundred"
1300
"nine thousand two hundred and ninety seven"
9297
利用python包:WordToDigits
点安装wordtodigits
它可以找到句子中以单词形式出现的数字,然后将其转换为正确的数字格式。如果存在的话,还要照顾小数部分。数字的词表示可以在段落的任何地方。
一种快速的解决方案是使用inflect.py生成字典进行翻译。
inflect.py具有一个number_to_words()
函数,它将数字(例如2
)转换为它的单词形式(例如'two'
)。不幸的是,没有提供它的反面(这将使您避免翻译词典的路线)。都一样,您可以使用该功能来构建翻译词典:
>>> import inflect
>>> p = inflect.engine()
>>> word_to_number_mapping = {}
>>>
>>> for i in range(1, 100):
... word_form = p.number_to_words(i) # 1 -> 'one'
... word_to_number_mapping[word_form] = i
...
>>> print word_to_number_mapping['one']
1
>>> print word_to_number_mapping['eleven']
11
>>> print word_to_number_mapping['forty-three']
43
如果您愿意花一些时间,则可以检查inflect.py函数的内部number_to_words()
功能,并构建自己的代码以动态地执行此操作(我没有尝试执行此操作)。
我采用了@recursive的逻辑,并转换为Ruby。我还对查找表进行了硬编码,因此它虽然不那么酷,但可以帮助新手了解正在发生的事情。
WORDNUMS = {"zero"=> [1,0], "one"=> [1,1], "two"=> [1,2], "three"=> [1,3],
"four"=> [1,4], "five"=> [1,5], "six"=> [1,6], "seven"=> [1,7],
"eight"=> [1,8], "nine"=> [1,9], "ten"=> [1,10],
"eleven"=> [1,11], "twelve"=> [1,12], "thirteen"=> [1,13],
"fourteen"=> [1,14], "fifteen"=> [1,15], "sixteen"=> [1,16],
"seventeen"=> [1,17], "eighteen"=> [1,18], "nineteen"=> [1,19],
"twenty"=> [1,20], "thirty" => [1,30], "forty" => [1,40],
"fifty" => [1,50], "sixty" => [1,60], "seventy" => [1,70],
"eighty" => [1,80], "ninety" => [1,90],
"hundred" => [100,0], "thousand" => [1000,0],
"million" => [1000000, 0]}
def text_2_int(string)
numberWords = string.gsub('-', ' ').split(/ /) - %w{and}
current = result = 0
numberWords.each do |word|
scale, increment = WORDNUMS[word]
current = current * scale + increment
if scale > 100
result += current
current = 0
end
end
return result + current
end
我当时想处理像 two thousand one hundred and forty-six
此代码适用于系列数据:
import pandas as pd
mylist = pd.Series(['one','two','three'])
mylist1 = []
for x in range(len(mylist)):
mylist1.append(w2n.word_to_num(mylist[x]))
print(mylist1)
This code works only for numbers below 99.
both word to Int and int to word.
(for rest need to implement 10-20 lines of code and simple logic. This is just simple code for beginners)
num=input("Enter the number you want to convert : ")
mydict={'1': 'One', '2': 'Two', '3': 'Three', '4': 'Four', '5': 'Five','6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine', '10': 'Ten','11': 'Eleven', '12': 'Twelve', '13': 'Thirteen', '14': 'Fourteen', '15': 'Fifteen', '16': 'Sixteen', '17': 'Seventeen', '18': 'Eighteen', '19': 'Nineteen'}
mydict2=['','','Twenty','Thirty','Fourty','fifty','sixty','Seventy','Eighty','Ninty']
if num.isdigit():
if(int(num)<20):
print(" :---> "+mydict[num])
else:
var1=int(num)%10
var2=int(num)/10
print(" :---> "+mydict2[int(var2)]+mydict[str(var1)])
else:
num=num.lower();
dict_w={'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':'17','eighteen':'18','nineteen':'19'}
mydict2=['','','twenty','thirty','fourty','fifty','sixty','seventy','eighty','ninty']
divide=num[num.find("ty")+2:]
if num:
if(num in dict_w.keys()):
print(" :---> "+str(dict_w[num]))
elif divide=='' :
for i in range(0, len(mydict2)-1):
if mydict2[i] == num:
print(" :---> "+str(i*10))
else :
str3=0
str1=num[num.find("ty")+2:]
str2=num[:-len(str1)]
for i in range(0, len(mydict2) ):
if mydict2[i] == str2:
str3=i;
if str2 not in mydict2:
print("----->Invalid Input<-----")
else:
try:
print(" :---> "+str((str3*10)+dict_w[str1]))
except:
print("----->Invalid Input<-----")
else:
print("----->Please Enter Input<-----")