Skip to content

Commit 43dfdf2

Browse files
authored
Merge branch 'main' into jean/default_max_tokens
2 parents 96a3889 + cc75611 commit 43dfdf2

File tree

6 files changed

+8
-10
lines changed

6 files changed

+8
-10
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,9 +428,9 @@ If you find Evalchemy useful, please consider citing us!
428428

429429
```
430430
@software{Evalchemy: Automatic evals for LLMs,
431-
author = {Guha, Etash and Raoof, Negin and Mercat, Jean and Marten, Ryan and Frankel, Eric and Keh, Sedrick and Grover, Sachin and Smyrnis, George and Vu, Trung and Saad-Falcon, Jon and Choi, Caroline and Arora, Kushal and Merrill, Mike and Deng, Yichuan and Suvarna, Ashima and Bansal, Hritik and Nezhurina, Marianna and Choi, Yejin and Heckel, Reinhard and Oh, Seewong and Hashimoto, Tatsunori and Jitsev, Jenia and Shankar, Vaishaal and Dimakis, Alex and Sathiamoorthy, Mahesh and Schmidt, Ludwig},
432-
month = nov,
431+
author = {Raoof, Negin and Guha, Etash Kumar and Marten, Ryan and Mercat, Jean and Frankel, Eric and Keh, Sedrick and Bansal, Hritik and Smyrnis, Georgios and Nezhurina, Marianna and Vu, Trung and Sprague, Zayne Rea and Merrill, Mike A and Chen, Liangyu and Choi, Caroline and Khan, Zaid and Grover, Sachin and Feuer, Benjamin and Suvarna, Ashima and Su, Shiye and Zhao, Wanjia and Sharma, Kartik and Ji, Charlie Cheng-Jie and Arora, Kushal and Li, Jeffrey and Gokaslan, Aaron and Pratt, Sarah M and Muennighoff, Niklas and Saad-Falcon, Jon and Yang, John and Aali, Asad and Pimpalgaonkar, Shreyas and Albalak, Alon and Dave, Achal and Pouransari, Hadi and Durrett, Greg and Oh, Sewoong and Hashimoto, Tatsunori and Shankar, Vaishaal and Choi, Yejin and Bansal, Mohit and Hegde, Chinmay and Heckel, Reinhard and Jitsev, Jenia and Sathiamoorthy, Maheswaran and Dimakis, Alex and Schmidt, Ludwig}
432+
month = June,
433433
title = {{Evalchemy}},
434-
year = {2024}
434+
year = {2025}
435435
}
436436
```

eval/chat_benchmarks/CodeForces/eval_instruct.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ def __init__(
6363
super().__init__(logger=logger, system_instruction=system_instruction)
6464
self.debug = debug
6565
self.max_new_tokens = max_tokens
66-
self.seed = seed
6766
self.n_repeat = 3
6867
self.filter_interaction_questions = True
6968

eval/chat_benchmarks/MTBench/eval_instruct.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ def get_model_answers(self, model: LM, model_id: str, questions: List[Dict[str,
116116
max_turns = max(len(q["turns"]) for q in questions)
117117
answer_file = self.answer_dir / f"{model_id}.jsonl"
118118

119-
self.config.max_new_token = self.max_new_token
120119
# Process each turn
121120
for turn_num in range(max_turns):
122121
self.logger.info(f"Processing Turn {turn_num + 1}")

eval/chat_benchmarks/MultiPLE/eval_instruct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def __init__(
115115
super().__init__(logger)
116116
self.languages = languages
117117
self.data_dir = data_dir
118-
self.max_tokens = max_tokens
118+
self.max_tokens = max_tokens or 1024
119119
self.num_workers = num_workers
120120
self.timeout = timeout
121121
self.debug = debug

eval/chat_benchmarks/alpaca_eval/eval_instruct.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
dataset_name: str = "tatsu-lab/alpaca_eval",
3232
subset: str = "alpaca_eval",
3333
split: str = "eval",
34-
max_tokens: int = 1024,
34+
max_tokens: Optional[int] = 1024,
3535
temperature: float = 0.5,
3636
do_sample: bool = True,
3737
debug: bool = False,
@@ -57,7 +57,7 @@ def __init__(
5757
self.dataset_name = dataset_name
5858
self.subset = subset
5959
self.split = split
60-
self.max_tokens = max_tokens
60+
self.max_tokens = max_tokens if max_tokens is not None else 1024
6161
self.temperature = temperature
6262
self.do_sample = do_sample
6363
self.debug = debug

eval/chat_benchmarks/zeroeval/eval_instruct.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ def __init__(
3333
self,
3434
tasks: List[str] = ["zebra-grid", "numersense-v2", "crux", "math-l5"],
3535
config: Optional[ZeroEvalConfig] = None,
36-
max_tokens: int = 4096,
36+
max_tokens: Optional[int] = 4096,
3737
debug: bool = False,
3838
logger: Optional[logging.Logger] = None,
3939
system_instruction: Optional[str] = None,
4040
):
4141
super().__init__(logger, system_instruction=system_instruction)
4242
self.tasks = tasks
4343
self.config = config or ZeroEvalConfig()
44-
self.max_new_tokens = int(max_tokens)
44+
self.max_new_tokens = int(max_tokens) if max_tokens is not None else 4096
4545
self.debug = debug
4646

4747
def load_dataset(self, data_name: str) -> Tuple[List[str], List[str], List[Dict[str, Any]], Dict[str, Any]]:

0 commit comments

Comments
 (0)