dataset-viewer
Backend that powers the dataset viewer on Hugging Face dataset pages through a public API.
Language: yaml
Author: Danielle Lawrence (@danielle.lawrence)
16 stars · 363 views
Files
- test_61_worker_metrics.py (py)
- _podmonitor.yaml (yaml)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- webhook (txt)
- servicemonitor.yaml (yaml)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- admin (txt)
- servicemonitor.yaml (yaml)
- openapi-spec.yml (yml)
- doc-pr-upload.yml (yml)
- s-rows.yml (yml)
- _e2e_tests.yml (yml)
- s-sse-api.yml (yml)
- s-api.yml (yml)
- e2e.yml (yml)
- j-migration.yml (yml)
- l-libcommon.yml (yml)
- j-cache-maintenance.yml (yml)
- l-libviewer.yml (yml)
- cd.yml (yml)
- l-libapi.yml (yml)
- s-worker.yml (yml)
- s-webhook.yml (yml)
- .spectral.yml (yml)
- architecture.png (image)
- Dockerfile (txt)
- libs (txt)
- libviewer (txt)
- Makefile (txt)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- libviewer (txt)
- __init__.py (py)
- dataset.py (py)
- tests (txt)
- test_dataset.py (py)
- test_20230622131500_lock_add_owner.py (py)
- migration.py (py)
- Makefile (txt)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- tests (txt)
- pyproject.toml (toml)
- tests (txt)
- constants.py (py)
- SECURITY.md (md)
- .vscode (vscode)
- e2e (txt)
- test_55_first_rows.py (py)
- test_20_api_healthcheck.py (py)
- __init__.py (py)
- jobs (txt)
- mongodb_migration (txt)
- constants.py (py)
- test_drop_migrations.py (py)
- test_renaming_migration.py (py)
- __init__.py (py)
- test_resources.py (py)
- test_migration.py (py)
- test_deletion_migrations.py (py)
- conftest.py (py)
- test_collector.py (py)
- migrations (txt)
- test_20230825170200_lock_add_ttl.py (py)
- test_20240703160300_cache_add_duration.py (py)
- test_20231106193200_cache_add_partial_field_in_split_duckdb_index.py (py)
- test_20240626095000_cache_add_stemmer_field_in_split_duckdb_index.py (py)
- test_20230309141600_cache_add_job_runner_version.py (py)
- test_20230516101600_queue_delete_index_without_revision.py (py)
- test_20240206153000_cache_add_tags_in_hub_cache.py (py)
- test_20240221103200_cache_merge_config_split_names.py (py)
- test_20230824154900_cache_add_features_field_in_split_duckdb_index.py (py)
- __init__.py (py)
- test_20230511100700_queue_delete_indexes_with_force.py (py)
- test_20240731143600_queue_add_dataset_status_to_queue_metrics.py (py)
- test_20230703110100_cache_add_partial_field_in_config_parquet_and_info.py (py)
- test_20240221160700_cache_merge_split_first_rows.py (py)
- test_20240112164500_cache_add_partial_field_in_split_descriptive_statistics.py (py)
- test_20240109160700_cache_add_failed_runs.py (py)
- test_20230926095900_cache_add_has_fts_field_in_split_duckdb_index.py (py)
- test_20230516101500_queue_job_add_revision.py (py)
- test_20240104085000_cache_add_retries.py (py)
- test_20240624144000_cache_add_estimated_num_rows_in_size.py (py)
- test_20230705160600_queue_job_add_difficulty.py (py)
- test_20240626151600_cache_remove_has_fts_field_in_split_duckdb_index.py (py)
- test_20240221160800_cache_set_updated_at_to_root_step.py (py)
- test_plan.py (py)
- src (txt)
- mongodb_migration (txt)
- config.py (py)
- resources.py (py)
- collector.py (py)
- __init__.py (py)
- main.py (py)
- database_migrations.py (py)
- deletion_migrations.py (py)
- check.py (py)
- renaming_migrations.py (py)
- plan.py (py)
- drop_migrations.py (py)
- migrations (txt)
- _20240624144000_cache_add_estimated_num_rows_field_in_size.py (py)
- _20230824154900_cache_add_features_field_in_split_duckdb_index.py (py)
- _20240112164500_cache_add_partial_field_in_split_descriptive_statistics.py (py)
- _20240731143600_queue_add_dataset_status_to_queue_metrics.py (py)
- _20221110230400_example.py (py)
- _20240626095000_cache_add_stemmer_in_split_duckdb_index.py (py)
- _20230511100700_queue_delete_indexes_with_force.py (py)
- _20230309123100_cache_add_progress.py (py)
- _20230703110100_cache_add_partial_field_in_config_parquet_and_info.py (py)
- __init__.py (py)
- _20230309141600_cache_add_job_runner_version.py (py)
- _20240221160700_cache_merge_split_first_rows.py (py)
- _20230622131500_lock_add_owner.py (py)
- _20230926095900_cache_add_has_fts_field_in_split_duckdb_index.py (py)
- _20240626151600_cache_remove_has_fts_field_in_split_duckdb_index.py (py)
- _20240221103200_cache_merge_config_split_names.py (py)
- _20221116133500_queue_job_add_force.py (py)
- _20231106193200_cache_add_partial_field_in_split_duckdb_index.py (py)
- _20240109160700_cache_add_failed_runs.py (py)
- _20221117223000_cache_generic_response.py (py)
- _20240703160300_cache_add_duration.py (py)
- _20240221160800_cache_set_updated_at_to_root_step.py (py)
- _20230705160600_queue_job_add_difficulty.py (py)
- _20230126164900_queue_job_add_priority.py (py)
- _20240206153000_cache_add_tags_in_hub_cache.py (py)
- _20230825170200_lock_add_ttl.py (py)
- README.md (md)
- _20230516101600_queue_delete_index_without_revision.py (py)
- _20240104085000_cache_add_retries.py (py)
- _20230516101500_queue_job_add_revision.py (py)
- _20240619124500_cache_add_estimated_dataset_info_field_parquet_and_info.py (py)
- cache_maintenance (txt)
- Makefile (txt)
- poetry.toml (toml)
- test_collect_cache_metrics.py (py)
- __init__.py (py)
- conftest.py (py)
- test_backfill.py (py)
- test_collect_queue_metrics.py (py)
- test_discussions.py (py)
- utils.py (py)
- src (txt)
- cache_maintenance (txt)
- config.py (py)
- discussions.py (py)
- cache_metrics.py (py)
- __init__.py (py)
- main.py (py)
- queue_metrics.py (py)
- mlcroissant.md (md)
- backfill.py (py)
- docs (txt)
- Makefile (txt)
- source (txt)
- filter.md (md)
- server.md (md)
- valid.md (md)
- pyspark.md (md)
- statistics.md (md)
- first_rows.md (md)
- pandas.md (md)
- clickhouse.md (md)
- parquet_process.md (md)
- cudf.md (md)
- duckdb.md (md)
- data_types.md (md)
- parquet.md (md)
- splits.md (md)
- size.md (md)
- _toctree.yml (yml)
- polars.md (md)
- index.md (md)
- croissant.md (md)
- info.md (md)
- quick_start.md (md)
- configs_and_splits.md (md)
- rows.md (md)
- analyze_data.md (md)
- postgresql.md (md)
- search.md (md)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- Makefile (txt)
- front (txt)
- admin_ui (txt)
- packages.txt (txt)
- app.py (py)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- requirements.txt (txt)
- Makefile (txt)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- tests (txt)
- test_50_search_healthcheck.py (py)
- test_53_filter.py (py)
- constants.py (py)
- test_13_first_rows.py (py)
- README.md (md)
- test_51_search_metrics.py (py)
- test_12_splits.py (py)
- test_14_statistics.py (py)
- test_30_admin_healthcheck.py (py)
- test_21_api_metrics.py (py)
- env (txt)
- prod.yaml (yaml)
- staging.yaml (yaml)
- test_31_admin_metrics.py (py)
- conftest.py (py)
- test_11_api.py (py)
- test_54_rows.py (py)
- data (txt)
- audios (txt)
- images (txt)
- 1.jpg (image)
- 2.jpg (image)
- pdfs (txt)
- test_40_rows_healthcheck.py (py)
- test_60_worker_healthcheck.py (py)
- charts (txt)
- templates (txt)
- worker (txt)
- hpa.yaml (yaml)
- deployment.yaml (yaml)
- test_52_search.py (py)
- utils.py (py)
- test_41_rows_metrics.py (py)
- test_10_healthcheck.py (py)
- DEVELOPER_GUIDE.md (md)
- README.md (md)
- docker-compose.yml (yml)
- chart (txt)
- Makefile (txt)
- values.yaml (yaml)
- pdb.yaml (yaml)
- _deployment.yaml (yaml)
- _hpa.yaml (yaml)
- podmonitor.yaml (yaml)
- jobs (txt)
- mongodb-migration (txt)
- job.yaml (yaml)
- _volumeMounts (txt)
- cron-jobs (txt)
- queue-metrics-collector (txt)
- job.yaml (yaml)
- backfill-retryable-errors (txt)
- pdb.yaml (yaml)
- job.yaml (yaml)
- post-messages (txt)
- job.yaml (yaml)
- cache-metrics-collector (txt)
- job.yaml (yaml)
- backfill (txt)
- job.yaml (yaml)
- _initContainers (txt)
- _volumes (txt)
- _env (txt)
- secrets.yaml (yaml)
- _common (txt)
- services (txt)
- search (txt)
- servicemonitor.yaml (yaml)
- test_rows.py (py)
- deployment.yaml (yaml)
- pdb.yaml (yaml)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- sse-api (txt)
- servicemonitor.yaml (yaml)
- deployment.yaml (yaml)
- test_features.py (py)
- deployment.yaml (yaml)
- pdb.yaml (yaml)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- rows (txt)
- servicemonitor.yaml (yaml)
- deployment.yaml (yaml)
- pdb.yaml (yaml)
- test_truncate_rows.py (py)
- data (txt)
- deployment.yaml (yaml)
- pdb.yaml (yaml)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- api (txt)
- servicemonitor.yaml (yaml)
- deployment.yaml (yaml)
- test_processing_graph.py (py)
- test_parquet_utils.py (py)
- ingress.yaml (yaml)
- ingress-internal.yaml (yaml)
- service.yaml (yaml)
- Chart.yaml (yaml)
- tools (txt)
- docker-compose-mongo.yml (yml)
- LICENSE (txt)
- .github (github)
- workflows (txt)
- trufflehog.yml (yml)
- test_operations.py (py)
- _quality-python.yml (yml)
- s-search.yml (yml)
- doc-pr-build.yml (yml)
- chart-pr.yml (yml)
- s-admin.yml (yml)
- _unit-tests-python.yml (yml)
- doc-build.yml (yml)
- test_state.py (py)
- cli.py (py)
- src (txt)
- lib.rs (rs)
- dataset.rs (rs)
- parquet.rs (rs)
- Cargo.toml (toml)
- libapi (txt)
- Makefile (txt)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- tests (txt)
- test_utils.py (py)
- __init__.py (py)
- test_jwt_token.py (py)
- test_duckdb.py (py)
- test_request.py (py)
- conftest.py (py)
- data (txt)
- test_image_rgb.jpg (image)
- test_response.py (py)
- test_authentication.py (py)
- src (txt)
- libapi (txt)
- config.py (py)
- request.py (py)
- routes (txt)
- __init__.py (py)
- healthcheck.py (py)
- metrics.py (py)
- __init__.py (py)
- duckdb.py (py)
- jwt_token.py (py)
- rows_utils.py (py)
- response.py (py)
- exceptions.py (py)
- authentication.py (py)
- utils.py (py)
- libcommon (txt)
- Makefile (txt)
- poetry.toml (toml)
- README.md (md)
- pyproject.toml (toml)
- tests (txt)
- constants.py (py)
- types.py (py)
- test_utils.py (py)
- test_orchestrator_smart_update.py (py)
- test_cloudfront.py (py)
- test_prometheus.py (py)
- __init__.py (py)
- test_orchestrator.py (py)
- test_resources.py (py)
- test_simple_cache.py (py)
- fixtures (txt)
- datasets.py (py)
- data (txt)
- test_image_rgb.jpg (image)
- fsspec.py (py)
- test_duckdb_utils.py (py)
- test_integration_s3_cloudfront.py (py)
- test_croissant_utils.py (py)
- test_config.py (py)
- conftest.py (py)
- test_backfill.py (py)
- viewer_utils (txt)
- test_assets.py (py)
- __init__.py (py)
- test_storage.py (py)
- test_backfill_on_real_graph.py (py)
- test_url_preparator.py (py)
- utils.py (py)
- queue (txt)
- test_dataset_blockages.py (py)
- __init__.py (py)
- test_jobs.py (py)
- test_past_jobs.py (py)
- test_lock.py (py)
- ATTRIBUTION.md (markdown)