Bỏ qua

Vietnamese AI Framework

Evaluation

phonghhd/vietnamese-ai

Evaluation API Reference¶

ModelConfig¶

class ModelConfig:
    """Cấu hình Vietnamese LLM từ 125M đến 7B."""

    def __init__(
        d_model=768,
        so_dau=12,
        d_ff=3072,
        so_block=12,
        so_tu_vung=32000,
        do_dai_toi_da=2048,
        dropout=0.1,
        ten="custom",
        mo_ta="",
    )

    def from_preset(preset) -> ModelConfig       # classmethod
    def danh_sach_presets() -> dict               # classmethod
    def to_dict() -> dict

    @property
    def so_tham_so(self) -> int
    @property
    def so_tham_so_str(self) -> str               # "125M", "1.3B", ...

Presets¶

Preset	d_model	so_dau	so_block	so_tu_vung	Params
`vnlm-tiny`	64	2	2	1,000	~10M
`vnlm-small`	768	12	12	32,000	~125M
`vnlm-medium`	1,024	16	24	32,000	~350M
`vnlm-large`	2,048	32	24	32,000	~1.3B
`vnlm-xl`	2,560	32	32	32,000	~2.7B
`vnlm-7b`	4,096	32	32	32,000	~6.8B

LMEvalHarness¶

class LMEvalHarness:
    """LM Evaluation Framework."""

    def __init__()

    def dang_ky_task(task: EvalTask) -> None
    def danh_sach_tasks() -> dict
    def danh_gia(model, cac_task=None, so_shot=0, limit=None) -> dict
    def bao_cao() -> str
    def thong_ke() -> dict

`danh_gia()` Returns¶

{
    "so_tasks": int,
    "so_shot": int,
    "tong_thoi_gian": float,
    "ket_qua": {
        "task_name": {
            "accuracy": float,
            "perplexity": float,
            # ... metrics tùy task type
        },
    },
    "tong_hop": dict,  # Aggregated metrics
}

EvalTask¶

class EvalTask:
    """Định nghĩa một evaluation task."""

    def __init__(
        ten: str,
        loai: str = "text_generation",  # classification, perplexity, qa, cloze
        du_lieu: list = None,           # [{text, label}, ...]
        metrics: list = None,           # ["accuracy", "f1", ...]
        mo_ta: str = "",
    )
    def to_dict() -> dict

BenchmarkRunner¶

class BenchmarkRunner:
    """Benchmark Runner cho Vietnamese LLM."""

    def __init__()

    def chay(model, benchmarks=None, so_shot=0) -> dict
    def bao_cao() -> str
    def thong_ke() -> dict

Benchmarks¶

Tên	Mô tả	Metrics
`perplexity`	Perplexity trên corpus tiếng Việt	perplexity_mean, perplexity_std
`generation`	Sinh văn bản	do_dai_trung_binh, thoi_gian
`sentiment`	Phân loại cảm xúc	accuracy
`speed`	Tốc độ inference	latency_mean_ms, latency_p95_ms
`qa`	Hỏi đáp	accuracy

`chay()` Returns¶

{
    "perplexity": {
        "perplexity_mean": float,
        "perplexity_std": float,
        "perplexity_min": float,
        "perplexity_max": float,
        "so_mau": int,
    },
    "generation": {
        "so_prompts": int,
        "do_dai_trung_binh": float,
        "chi_tiet": list,
    },
    "sentiment": {
        "accuracy": float,
        "so_mau": int,
    },
    "speed": {
        "latency_mean_ms": float,
        "latency_p50_ms": float,
        "latency_p95_ms": float,
        "latency_p99_ms": float,
        "so_lan_chay": int,
    },
    "tong_thoi_gian": float,
    "benchmarks": list,
}