使用python和python-benedict
库完全可以轻松完成所需的操作(在Github上是开源的:
安装 pip install python-benedict
from benedict import benedict as bdict
# data source can be a filepath or an url
data_source = """
RecordID,kingdom,phylum,class,order,family,genus,species
1,Animalia,Chordata,Mammalia,Primates,Hominidae,Homo,Homo sapiens
2,Animalia,Chordata,Mammalia,Carnivora,Canidae,Canis,Canis
3,Plantae,nan,Magnoliopsida,Brassicales,Brassicaceae,Arabidopsis,Arabidopsis thaliana
4,Plantae,nan,Magnoliopsida,Fabales,Fabaceae,Phaseoulus,Phaseolus vulgaris
"""
data_input = bdict.from_csv(data_source)
data_output = bdict()
ancestors_hierarchy = ['kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']
for value in data_input['values']:
data_output['.'.join([value[ancestor] for ancestor in ancestors_hierarchy])] = bdict()
print(data_output.dump())
# if this output is ok for your needs, you don't need the following code
keypaths = sorted(data_output.keypaths(), key=lambda item: len(item.split('.')), reverse=True)
data_output['children'] = []
def transform_data(d, key, value):
if isinstance(value, dict):
value.update({ 'name':key, 'children':[] })
data_output.traverse(transform_data)
for keypath in keypaths:
target_keypath = '.'.join(keypath.split('.')[:-1] + ['children'])
data_output[target_keypath].append(data_output.pop(keypath))
print(data_output.dump())
第一个打印输出将是:
{
"Animalia": {
"Chordata": {
"Mammalia": {
"Carnivora": {
"Canidae": {
"Canis": {
"Canis": {}
}
}
},
"Primates": {
"Hominidae": {
"Homo": {
"Homo sapiens": {}
}
}
}
}
}
},
"Plantae": {
"nan": {
"Magnoliopsida": {
"Brassicales": {
"Brassicaceae": {
"Arabidopsis": {
"Arabidopsis thaliana": {}
}
}
},
"Fabales": {
"Fabaceae": {
"Phaseoulus": {
"Phaseolus vulgaris": {}
}
}
}
}
}
}
}
第二个打印输出将是:
{
"children": [
{
"name": "Animalia",
"children": [
{
"name": "Chordata",
"children": [
{
"name": "Mammalia",
"children": [
{
"name": "Carnivora",
"children": [
{
"name": "Canidae",
"children": [
{
"name": "Canis",
"children": [
{
"name": "Canis",
"children": []
}
]
}
]
}
]
},
{
"name": "Primates",
"children": [
{
"name": "Hominidae",
"children": [
{
"name": "Homo",
"children": [
{
"name": "Homo sapiens",
"children": []
}
]
}
]
}
]
}
]
}
]
}
]
},
{
"name": "Plantae",
"children": [
{
"name": "nan",
"children": [
{
"name": "Magnoliopsida",
"children": [
{
"name": "Brassicales",
"children": [
{
"name": "Brassicaceae",
"children": [
{
"name": "Arabidopsis",
"children": [
{
"name": "Arabidopsis thaliana",
"children": []
}
]
}
]
}
]
},
{
"name": "Fabales",
"children": [
{
"name": "Fabaceae",
"children": [
{
"name": "Phaseoulus",
"children": [
{
"name": "Phaseolus vulgaris",
"children": []
}
]
}
]
}
]
}
]
}
]
}
]
}
]
}
nan
含有Magnoliopsida的Phylum的。那是nan
什么 Phylum是Anphyphyta,或木兰(它是旧的Phylum Angiospermae)。