-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompound.py
192 lines (139 loc) · 5.76 KB
/
compound.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
from copy import deepcopy
import numpy as N
import pymatgen
import sys, os
from extracting_oxidation.parse_html import extract_oxidation_dictionary
# Useful global objects from pyatgen
PT = pymatgen.periodic_table.ALL_ELEMENT_SYMBOLS
# Build the dictionary of all possible oxidation states for all the elements
"""
# ---> PYMATGEN GIVES MANY UNLIKELY OXIDATION STATES <----
# use the values provided by pymatgen
oxidation_dictionary = {}
for symbol in PT:
El = pymatgen.Element(symbol)
oxidation_dictionary[symbol] = El.oxidation_states
"""
oxidation_dictionary = extract_oxidation_dictionary()
class Compound(object):
"""
Contains the formula for a compound, and computes the likely redox states for transition
metals within this compound.
"""
def __init__(self,compound_string):
"""
input:
- compound_string: should be of the form 'Am Bn Cp ...', where A,B,C,... are element symbol, and
m,n,p, ... are real numbers.
"""
self.string = compound_string
self.composition_dict = {}
self.extract_composition()
self.find_oxidation_states()
self.get_nice_formatted_formula()
def extract_composition(self):
"""
Take the input string and decompose it into elements/composition.
Make sure the format of the string is correct.
"""
split_formula_string = self.string.split()
# This could be implemented as a @property.
# I may do that later...
tol = 1e-8
self.is_disordered = False
for ss in split_formula_string:
element, number = self.parse_element(ss)
self.composition_dict[element] = number
if abs(number - N.round(number)) > tol:
self.is_disordered = True
def parse_element(self,str):
"""
Extract the element and number from a string.
The expected format is either 'Ax', or 'ABx', where A (AB) represents
an element, and x is a number. Any other format indicates an erroneous input.
"""
# Extract what should be an element symbol
if len(str) > 1:
if str[1].isalpha():
element_symbol = str[0:2]
else:
element_symbol = str[0]
else:
element_symbol = str[0]
# Check that this is indeed an element
if element_symbol not in PT:
raise ValueError('Parsed symbol not recognized as an element')
# process what is left of the string
rest = str[len(element_symbol):]
if len(rest) == 0:
rest = '1'
try:
number = float(rest)
except:
raise ValueError('Parsed occupation not recognized as a number')
return element_symbol, number
def find_oxidation_states(self):
"""
This routine will build all redox states for the compound, in order
to identify possible redox states of elements in compound.
The implementation below is surely not very efficient. However,
this is not an exercise in computer science; I want to get to
useable results asap.
"""
# initialize the data structure which will contain all the
# potential redox states
oldTree = [ [] ]
# let's keep track of the order in which the elements
# appear in the loop below. The actual order is not important,
# but we must consistently use the same order when treating the data.
self.list_elements = []
self.list_numbers = []
# iterate over all elements in the compound
for element, number in self.composition_dict.iteritems():
self.list_elements.append(element)
self.list_numbers.append(number)
# extract the possible oxidation states from global dictionary
oxidations = N.array(oxidation_dictionary[element])
# convert these elemental states to the actual charge of this element in the compound
charges = number*oxidations
# keep track of what we had at the last iteration
# build an updated tree for this iteration
Tree = []
for list in oldTree:
for charge in charges:
new_list = N.append(list,charge)
Tree.append(new_list)
oldTree = deepcopy(Tree)
# The Tree structure now contains all possible combination of oxidation states
# We must now find the physical ones, namely the charge zero combination
self.oxidation_states_dict = {}
tol = 1e-8
number_of_solutions = 0
for branch in Tree:
if N.abs(N.sum(branch)) < tol:
number_of_solutions +=1
oxidation_states = branch/self.list_numbers
for el, ox in zip(self.list_elements,oxidation_states):
self.oxidation_states_dict[el] = ox
if number_of_solutions == 0:
self.oxidation_states_dict = None
if number_of_solutions > 1:
self.multiple_redox_solutions = True
else:
self.multiple_redox_solutions = False
#raise(ValueError,'More than one oxidation state found!')
def get_nice_formatted_formula(self):
"""
Return the name of the compound in a nice format
"""
self.formula = ''
tol = 1e-8
for el, n in zip(self.list_elements, self.list_numbers):
str = '%s'%el
if N.abs(n - 1.) < tol:
str += ' '
elif N.abs(n - N.round(n)) < tol:
str += '%i '%n
else:
str += '%4.3f '%n
self.formula += str