如何解决数据丢失:校验和不匹配
我正在使用gpt-simple python库。 我微调了355M GPT2型号,并且包含该型号的文件夹包含以下文件。
checkpoint
counter
model-139000.data-00000-of-00001
encoder.json
model-139000.index
model-139000.meta
events.out.tfevents.1600912770.b7r6ru36
hparams.json
vocab.bpe
我执行了张量流量化,使用以下代码将模型“ model-139000.data-00000-of-00001”的大小从1.32GB减小到700 MB左右。
import tensorflow as tf
import os
from keras_gpt_2 import load_trained_model_from_checkpoint
model_folder = '/content/checkpoint/run1/'
config_path = os.path.join(model_folder,'hparams.json')
checkpoint_path = os.path.join(model_folder,'model-139000')
encoder_path = os.path.join(model_folder,'encoder.json')
vocab_path = os.path.join(model_folder,'vocab.bpe')
old_model = load_trained_model_from_checkpoint(config_path,checkpoint_path)
# import tensorflow as tf
tflite_converter = tf.lite.TFLiteConverter.from_keras_model(old_model)
tflite_converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_converter.target_spec.supported_types = [tf.float16]
tflite_model = tflite_converter.convert()
open("tf_lite_model.tflite","wb").write(tflite_model)
我将压缩的“ tf_lite_model.tflite”文件重命名为“ model-139000.data-00000-of-00001”,然后覆盖包含受过训练的模型的文件夹中的orig文件。
现在,当我尝试使用gpt2_simple库中的以下代码加载模型时
tf.reset_default_graph()
sess = gpt2.start_tf_sess()
gpt2.load_gpt2(sess,run_name='run1')
我收到以下错误
Loading checkpoint checkpoint/run1/model-139000
INFO:tensorflow:Restoring parameters from checkpoint/run1/model-139000
---------------------------------------------------------------------------
DataLossError Traceback (most recent call last)
/tensorflow-1.15.2/python3.6/tensorflow_core/python/client/session.py in _do_call(self,fn,*args)
1364 try:
-> 1365 return fn(*args)
1366 except errors.OpError as e:
8 frames
DataLossError: 2 root error(s) found.
(0) Data loss: Checksum does not match: stored 1169152242 vs. calculated on the restored bytes 3893622755
[[{{node save/RestoreV2}}]]
[[save/RestoreV2/_301]]
(1) Data loss: Checksum does not match: stored 1169152242 vs. calculated on the restored bytes 3893622755
[[{{node save/RestoreV2}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception,another exception occurred:
DataLossError Traceback (most recent call last)
/tensorflow-1.15.2/python3.6/tensorflow_core/python/client/session.py in _do_call(self,*args)
1382 '\nsession_config.graph_options.rewrite_options.'
1383 'disable_meta_optimizer = True')
-> 1384 raise type(e)(node_def,op,message)
1385
1386 def _extend_graph(self):
DataLossError: 2 root error(s) found.
(0) Data loss: Checksum does not match: stored 1169152242 vs. calculated on the restored bytes 3893622755
[[node save/RestoreV2 (defined at /tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py:1748) ]]
[[save/RestoreV2/_301]]
(1) Data loss: Checksum does not match: stored 1169152242 vs. calculated on the restored bytes 3893622755
[[node save/RestoreV2 (defined at /tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
Original stack trace for 'save/RestoreV2':
File "/usr/lib/python3.6/runpy.py",line 193,in _run_module_as_main
"__main__",mod_spec)
File "/usr/lib/python3.6/runpy.py",line 85,in _run_code
exec(code,run_globals)
File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py",line 16,in <module>
app.launch_new_instance()
File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py",line 664,in launch_instance
app.start()
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py",line 499,in start
self.io_loop.start()
File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py",line 132,in start
self.asyncio_loop.run_forever()
File "/usr/lib/python3.6/asyncio/base_events.py",line 438,in run_forever
self._run_once()
File "/usr/lib/python3.6/asyncio/base_events.py",line 1451,in _run_once
handle._run()
File "/usr/lib/python3.6/asyncio/events.py",line 145,in _run
self._callback(*self._args)
File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py",line 122,in _handle_events
handler_func(fileobj,events)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py",line 300,in null_wrapper
return fn(*args,**kwargs)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py",line 462,in _handle_events
self._handle_recv()
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py",line 492,in _handle_recv
self._run_callback(callback,msg)
File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py",line 444,in _run_callback
callback(*args,**kwargs)
File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py",**kwargs)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py",line 283,in dispatcher
return self.dispatch_shell(stream,msg)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py",line 233,in dispatch_shell
handler(stream,idents,line 399,in execute_request
user_expressions,allow_stdin)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py",line 208,in do_execute
res = shell.run_cell(code,store_history=store_history,silent=silent)
File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py",line 537,in run_cell
return super(ZMQInteractiveShell,self).run_cell(*args,**kwargs)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py",line 2718,in run_cell
interactivity=interactivity,compiler=compiler,result=result)
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py",line 2822,in run_ast_nodes
if self.run_code(code,result):
File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py",line 2882,in run_code
exec(code_obj,self.user_global_ns,self.user_ns)
File "<ipython-input-4-6bec7b5b56a0>",line 34,in <module>
gpt2.load_gpt2(sess,run_name='run1')
File "/usr/local/lib/python3.6/dist-packages/gpt_2_simple/gpt_2.py",line 392,in load_gpt2
saver = tf.compat.v1.train.Saver(allow_empty=True)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 828,in __init__
self.build()
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 840,in build
self._build(self._filename,build_save=True,build_restore=True)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 878,in _build
build_restore=build_restore)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 508,in _build_internal
restore_sequentially,reshape)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 328,in _AddRestoreOps
restore_sequentially)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/training/saver.py",line 575,in bulk_restore
return io_ops.restore_v2(filename_tensor,names,slices,dtypes)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/gen_io_ops.py",line 1696,in restore_v2
name=name)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/op_def_library.py",line 794,in _apply_op_helper
op_def=op_def)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/deprecation.py",line 507,in new_func
return func(*args,**kwargs)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py",line 3357,in create_op
attrs,op_def,compute_device)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py",line 3426,in _create_op_internal
op_def=op_def)
File "/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py",line 1748,in __init__
self._traceback = tf_stack.extract_stack()
其他信息: 这是gpt2_simple库内部内部的load_gpt2()函数
def load_gpt2(sess,checkpoint='latest',run_name="run1",checkpoint_dir="checkpoint",model_name=None,model_dir='models',multi_gpu=False):
"""Loads the model checkpoint or existing model into a TensorFlow session
for repeated predictions.
"""
if model_name:
checkpoint_path = os.path.join(model_dir,model_name)
else:
checkpoint_path = os.path.join(checkpoint_dir,run_name)
hparams = model.default_hparams()
with open(os.path.join(checkpoint_path,'hparams.json')) as f:
hparams.override_from_dict(json.load(f))
context = tf.compat.v1.placeholder(tf.int32,[1,None])
gpus = []
if multi_gpu:
gpus = get_available_gpus()
output = model.model(hparams=hparams,X=context,gpus=gpus)
if checkpoint=='latest':
ckpt = tf.train.latest_checkpoint(checkpoint_path)
else:
ckpt = os.path.join(checkpoint_path,checkpoint)
saver = tf.compat.v1.train.Saver(allow_empty=True)
sess.run(tf.compat.v1.global_variables_initializer())
if model_name:
print('Loading pretrained model',ckpt)
else:
print('Loading checkpoint',ckpt)
saver.restore(sess,ckpt)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。