-
Notifications
You must be signed in to change notification settings - Fork 1
/
cryptotax.py
431 lines (355 loc) · 13.6 KB
/
cryptotax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# Big Picture:
# collect csvs
# make list of addresses that I own
# make list of transactions
# sort transactions by date
# ins are income, outs are spends, unless they co-occur, in which case, like kind or self transfer
# keep track of the lots as they dissolve
import copy
import json
from config import *
import os
from prices import prices
from hashlib import sha3_256 as sha
from config import logger
from wrangler import Wrangler
anomalies = []
target_currencies = ['ZET']
wrangler = Wrangler()
#queues = { 'BTC': [], 'ETH': [], 'DASH': [], 'BCH': [] }
# queues = {}
# balances = {}
#
# my_addresses = []
def initialize():
global queues
global balances
global cost_bases
global my_addresses
global tx_id
global spend_queues
global data_hash
tx_id = 0
queues = {}
spend_queues = {}
balances = {}
cost_bases = {}
my_addresses = []
def likekind_eligible(tx):
return int(tx['date'].split('/')[0]) < likekind_cutoff_year
def process_exchange_order(txs, i):
tx1 = txs[i]
found = False
for j in range(i+1, len(txs)):
if txs[j]['notes'] == tx1['notes'] and txs[j]['direction'] != tx1['direction']:
tx2 = txs[j]
found = True
break
#print(tx1)
#print(tx2)
if found:
logger.debug('pairing order %d with %d' % (tx1['index'], tx2['index']))
out_tx = tx1 if tx1['direction'] == 'out' else tx2
in_tx = tx1 if tx1['direction'] == 'in' else tx2
if out_tx['currency'] == 'USD' or in_tx['currency'] == 'USD' or likekind_eligible(out_tx) == False:
handle_purchase_sale(out_tx,in_tx)
else:
#print('likekind')
handle_likekind(out_tx, in_tx)
txs.remove(tx2)
else:
raise Exception('no order found to pair with %s' % tx1)
def process_off_exchange(txs, i):
# look at next in tx to determine if likekind or not
tx1 = txs[i]
found = False
for j in range(i+1, len(txs)):
tx2 = txs[j]
# is this a different currency we are looking at?
different_currency = tx2['currency'] != tx1['currency']
if not different_currency: continue
# are we close enough to tx1 in time?
close_time = abs(tx2['timestamp'] - tx1['timestamp']) < one_day
if not close_time: break
# must be different directions, neither is exchange order and close enough dollar amounts
different_direction = tx2['direction'] != tx1['direction']
if different_direction is False: continue
close_dollar = abs(tx2['dollar'] - tx1['dollar']) / max(tx2['dollar'], tx1['dollar'], 1) < dollar_pct
not_order = 'order' not in tx2['notes']
if not_order and close_dollar:
found = True
break
#print(tx2)
# if close in time and dollar amount, probably likekind or self transfer
if found:
if tx1['currency'] == 'USD' or tx2['currency'] == 'USD' or likekind_eligible(tx2) == False:
handle_purchase_sale(tx1,tx2)
elif different_currency:
handle_likekind(tx1, tx2)
else:
handle_self_transfer(tx1, tx2)
txs.remove(tx2)
else:
# print('txs %d and %d not paired' % (tx1['index'], tx2['index']))
# print('close_time %s' % close_time)
# print('close_dollar %s' % close_dollar)
if tx1['currency'] != 'USD':
handle_single(tx1)
def process_txs(data):
txs = copy.deepcopy(wrangler.sort_txs_by_date(data))
global tx_id
tx_id = txs[-1]['index']
i = 0
while i < len(txs):
tx1 = txs[i]
if tx1['timestamp'] > cutoff_year.timestamp():
logger.info('reached tx past cutoff year %s. Stopping' % cutoff_year)
break
# handle exchange orders
if 'order' in tx1['notes']:
process_exchange_order(txs, i)
else:
process_off_exchange(txs, i)
i += 1
wrangler.dump_balances_cost_basis(balances, cost_bases)
def handle_purchase_sale(tx1, tx2):
incoming = tx1 if tx1['direction'] == 'in' else tx2
outgoing = tx2 if tx2['direction'] == 'out' else tx1
if incoming['currency'] == 'USD':
handle_spend(outgoing, incoming)
elif outgoing['currency'] == 'USD':
handle_purchase(usd=outgoing,crypto=incoming)
else:
handle_spend(outgoing)
handle_income(incoming)
def handle_single(tx):
if tx['direction'] == 'in':
handle_income(tx)
elif tx['direction'] == 'out':
handle_spend(tx)
else:
raise Exception('Unknown direction type %s' % tx)
# we purchased or received crypto
def handle_purchase(usd,crypto):
#maybe_print('treating txs %d and %d as purchase' % (tx1['index'], tx2['index']))
crypto['category'] = 'purchase'
crypto['paired'] = usd['index']
usd['category'] = 'purchase'
usd['paired'] = crypto['index']
crypto['cost_basis'] = usd['dollar']
crypto['origin_date'] = crypto['timestamp']
crypto['id'] = crypto['index']
crypto['previous_id'] = -1
crypto['origin_id'] = crypto['id']
q_in = get_queue_for_currency(crypto['currency'])
q_in.insert(0, crypto)
update_balance(crypto['currency'], crypto['amount'])
update_cost_basis(crypto['currency'], crypto['cost_basis'])
wrangler.write_income_spend(crypto)
# we purchased or received crypto
def handle_income(income):
logger.info('Handling income tx %s' % income)
#maybe_print('treating tx %d as income' % income['index'])
income['category'] = 'income'
income['cost_basis'] = income['dollar']
income['origin_date'] = income['timestamp']
income['id'] = income['index']
income['previous_id'] = -1
income['origin_id'] = income['id']
q_in = get_queue_for_currency(income['currency'])
q_in.insert(0, income)
update_balance(income['currency'], income['amount'])
update_cost_basis(income['currency'], income['cost_basis'])
wrangler.write_income_spend(income)
def handle_spend(spend, incoming=None):
#maybe_print('treating tx %d as spend' % spend['index'])
if incoming is not None and incoming['currency'] == 'USD' and incoming['amount'] > 0:
price = incoming['amount'] / spend['amount']
else:
price = spend['price']
spend_amount_left = spend['amount']
timestamp = spend['timestamp']
currency = spend['currency']
q_out = get_queue_for_currency(currency)
#assert_q_sorted(q_out)
update_balance(currency, -1 * spend['amount'])
while spend_amount_left > 0:
if len(q_out) == 0:
msg = "Empty queue for currency %s, tried to spend %s" % (currency, spend_amount_left)
maybe_print(msg)
# pprint(spend)
# pprint(balances)
anomalies.append([spend])
return
tx_out = q_out[-1]
if spend_amount_left >= tx_out['amount']:
spend_piece_amount = tx_out['amount']
cost_basis = tx_out['cost_basis']
q_out.pop()
else:
spend_piece_amount = spend_amount_left
cost_basis = tx_out['cost_basis'] * spend_piece_amount / tx_out['amount']
tx_out['cost_basis'] -= cost_basis
tx_out['amount'] -= spend_piece_amount
spend_amount_left -= spend_piece_amount
update_cost_basis(currency, -1 * cost_basis)
if spend_amount_left < 1e-8:
spend_amount_left = 0
income_spend = {
'id': get_new_tx_id(),
'previous_id': tx_out['id'],
'currency': currency,
'category': 'spend',
'amount': spend_piece_amount,
'cost_basis': cost_basis,
'price': price,
'timestamp': timestamp,
'direction':'out',
'origin_date': tx_out['origin_date'],
'origin_id': tx_out['origin_id']
}
wrangler.write_income_spend(income_spend)
def handle_self_transfer(spend,income):
spend['category'] = 'self transfer'
spend['paired'] = income['index']
income['category'] = 'self transfer'
income['paired'] = spend['index']
def handle_likekind(tx1, tx2):
spend = tx1 if tx1['direction'] == 'out' else tx2
income = tx1 if tx1['direction'] == 'in' else tx2
out_currency = spend['currency']
in_currency = income['currency']
q_in = get_queue_for_currency(in_currency)
q_out = get_queue_for_currency(out_currency)
update_balance(out_currency, -1 * spend['amount'])
update_balance(in_currency, income['amount'])
out_amount = spend['amount']
in_amount = income['amount']
out_price = spend['price']
in_price = income['price']
spend_amount_left = out_amount
income_amount_left = in_amount
#maybe_print('Spending %f %s, have %f' % (out_amount, out_currency, balances[out_currency]))
#maybe_print('Receiving %f %s, have %f' % (in_amount, in_currency, balances[in_currency]))
# deplete items from the out queue until spend is accounted for
while spend_amount_left > 0:
if len(q_out) == 0:
msg = "Empty queue for currency %s, tried to pull %s" % (out_currency, spend_amount_left)
maybe_print(msg)
# pprint(spend)
# pprint(income)
# pprint(balances)
anomalies.append([tx1,tx2])
return
tx_out = q_out[-1]
if spend_amount_left > tx_out['amount']:
spend_piece_amount = tx_out['amount']
income_piece_amount = in_amount * spend_piece_amount / out_amount
cost_basis = tx_out['cost_basis']
q_out.pop()
elif spend_amount_left == tx_out['amount']:
spend_piece_amount = tx_out['amount']
income_piece_amount = income_amount_left
cost_basis = tx_out['cost_basis']
q_out.pop()
else:
spend_piece_amount = spend_amount_left
income_piece_amount = income_amount_left
cost_basis = tx_out['cost_basis'] * spend_piece_amount / tx_out['amount']
tx_out['amount'] -= spend_piece_amount
tx_out['cost_basis'] -= cost_basis
income_amount_left -= income_piece_amount
spend_amount_left -= spend_piece_amount
update_cost_basis(out_currency, -1 * cost_basis)
update_cost_basis(in_currency, cost_basis)
if spend_amount_left < 5e-6:
spend_amount_left = 0
income_piece = copy.deepcopy(income)
income_piece['id'] = get_new_tx_id()
income_piece['amount'] = income_piece_amount
income_piece['dollar'] = income_piece['amount'] * income_piece['price']
income_piece['origin_date'] = tx_out['origin_date']
income_piece['origin_id'] = tx_out['origin_id']
income_piece['cost_basis'] = cost_basis
q_in.insert(0, income_piece)
likekind = {
'id': income_piece['id'],
'previous_id': tx_out['id'],
'received':in_currency,
'received_amount':income_piece_amount,
'received_price':in_price,
'relinquished': out_currency,
'relinquished_amount': spend_piece_amount,
'relinquished_price': out_price,
'swap_date': income_piece['timestamp'],
'last_trade_date': spend['timestamp'],
'origin_date': income_piece['origin_date'],
'origin_id': income_piece['origin_id'],
'cost_basis': income_piece['cost_basis'],
}
wrangler.write_likekind(likekind)
def get_queue_for_currency(currency):
try:
return queues[currency]
except KeyError:
queues[currency] = []
return queues[currency]
def update_balance(currency, amount):
global target_currencies
if currency in target_currencies:
print('balance[%s] = %s + %s' % (currency, balances.get(currency,0), amount))
try:
balances[currency] += amount
except KeyError:
balances[currency] = amount
balances[currency] = round(balances[currency], 7)
if balances[currency] < 0:
logger.warning('Negative balance %s: %f' % (currency, balances[currency]))
def update_cost_basis(currency, amount):
global target_currencies
if currency in target_currencies:
pass
# print('cost_basis[%s] = %s + %s' % (currency, cost_bases.get(currency,0), amount))
try:
cost_bases[currency] += amount
except KeyError:
cost_bases[currency] = amount
cost_bases[currency] = round(cost_bases[currency], 7)
if cost_bases[currency] < 0:
logger.warning('Negative cost basis %s: %f' % (currency, cost_bases[currency]))
def process_remainder():
for symbol, q in queues.items():
for item in q:
cost_basis = {
'amount': item['amount'],
'symbol': symbol,
'cost_basis': item['cost_basis'],
'origin_date': item['origin_date']
}
wrangler.write_cost_basis(cost_basis)
def get_new_tx_id():
global tx_id
tx_id += 1
return tx_id
def hash_path(path):
with open(path, 'r') as file:
return sha(file.read().encode()).hexdigest()
# def assert_q_sorted(q):
# sorted_q = sorted(q, key=lambda x: x['timestamp'], reverse=True)
# if sorted_q != q:
# print('badbad')
def start_to_finish():
if os.path.isfile(final_file) and not reload_data:
maybe_print('Loading previously collected txs from disk')
txs = json.load(open(final_file, 'r'))
else:
maybe_print('Collecting transactions')
txs = wrangler.collect_transactions()
wrangler.dump_txs(txs)
wrangler.dump_prices(prices)
initialize()
process_txs(txs)
process_remainder()
if __name__ == "__main__":
start_to_finish()
print('Dun')