Skip to content

Commit

Permalink
Fix wrong usage of token ids. Remove logging.
Browse files Browse the repository at this point in the history
  • Loading branch information
zxybazh committed Jan 29, 2024
1 parent 9a29650 commit 012388d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
1 change: 0 additions & 1 deletion serve/mlc_serve/engine/engine_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def logprob_detokenize(
logprob_info.previous_tokens = []
for top_token, top_logprob in top_tokens:
detokenized = tokenizer.convert_ids_to_tokens(logprob_info.previous_tokens + [top_token])[-1]
LOG.info(f"detokenized: {detokenized}")
top_logprobs.append(TopLogprobs(
token=detokenized,
logprob=float(top_logprob),
Expand Down
17 changes: 8 additions & 9 deletions serve/mlc_serve/model/tvm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ def get_tvm_model(config, dev):
return load_disco_module(config.model_artifact_path, lib_path, config.num_shards)

def attach_detokenization_info(logprob_info:RawLogprobsInfo, token_ids: List[int]):
if logprob_info is None:
return None
logprob_info.previous_tokens = token_ids
return logprob_info

Expand Down Expand Up @@ -332,11 +330,10 @@ def generate(

try:
next_tokens, logprob_infos = sample(logits, sampling_params, self.vocab_size)
current_ids = list(input_ids.numpy())
assert next_tokens is not None
outputs = []
for i, (sequence_id, new_token) in enumerate(
zip(sequence_ids, next_tokens)
for i, (sequence_id, new_token, token_ids) in enumerate(
zip(sequence_ids, next_tokens, all_token_ids)
):
if not new_token in requests[i].sampling_params.appeared_tokens_freq:
requests[i].sampling_params.appeared_tokens_freq[new_token] = 0
Expand All @@ -348,20 +345,22 @@ def generate(
sequence_id=SequenceId(sequence_id.request_id, seq_id),
generated_tokens=[new_token],
error=None,
logprob_info=[attach_detokenization_info(logprob_infos[i], current_ids)],
logprob_info=[attach_detokenization_info(logprob_infos[i], token_ids) if logprob_infos[i] else None],
)
)
current_ids.append(new_token)
if logprob_infos[i]:
token_ids.append(new_token)
else:
outputs.append(
TextGenerationResult(
sequence_id=sequence_id,
generated_tokens=[new_token],
error=None,
logprob_info=[logprob_infos[i]],
logprob_info=[attach_detokenization_info(logprob_infos[i], token_ids) if logprob_infos[i] else None],
)
)
current_ids.append(new_token)
if logprob_infos[i]:
token_ids.append(new_token)

return outputs
except RuntimeError:
Expand Down

0 comments on commit 012388d

Please sign in to comment.