diff --git a/.ci/pytorch/perf_test/compare_with_baseline.py b/.ci/pytorch/perf_test/compare_with_baseline.py index f7b962632cd79f..c756df378729bf 100644 --- a/.ci/pytorch/perf_test/compare_with_baseline.py +++ b/.ci/pytorch/perf_test/compare_with_baseline.py @@ -59,12 +59,12 @@ print("z-value: ", z_value) if z_value >= 3: - raise Exception('''\n + raise Exception(f'''\n z-value >= 3, there is high chance of perf regression.\n To reproduce this regression, run -`cd .ci/pytorch/perf_test/ && bash {}.sh` on your local machine +`cd .ci/pytorch/perf_test/ && bash {test_name}.sh` on your local machine and compare the runtime before/after your code change. -'''.format(test_name)) +''') else: print("z-value < 3, no perf regression detected.") if args.update: diff --git a/.github/scripts/trymerge.py b/.github/scripts/trymerge.py index cc253f36cbd1b7..b1026ac420c945 100755 --- a/.github/scripts/trymerge.py +++ b/.github/scripts/trymerge.py @@ -620,7 +620,7 @@ def get_ghstack_prs(repo: GitRepo, pr: "GitHubPR") -> List[Tuple["GitHubPR", str Get the open PRs in the stack that are below this PR. Throws error if any of the PRs are out of sync. """ assert pr.is_ghstack_pr() - entire_stack: List[Tuple["GitHubPR", str]] = [] + entire_stack: List[Tuple[GitHubPR, str]] = [] # For ghstack, cherry-pick commits based from origin orig_ref = f"{repo.remote}/{re.sub(r'/head$', '/orig', pr.head_ref())}" rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}") diff --git a/.lintrunner.toml b/.lintrunner.toml index 0160314749cb1a..a6c14e5f82a760 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -3086,6 +3086,6 @@ init_command = [ 'python3', 'tools/linter/adapters/pip_init.py', '--dry-run={{DRYRUN}}', - 'ruff==0.0.277', + 'ruff==0.0.280', ] is_formatter = true diff --git a/pyproject.toml b/pyproject.toml index 5cfea348cae5aa..2b16f472f612f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ ignore = [ "B019", "B020", "B023", "B024", "B026", "B028", # No explicit `stacklevel` keyword argument found - "B904", "B905", + "B904", "E402", "C408", # C408 ignored because we like the dict keyword argument syntax "E501", # E501 is not flexible enough, we're using B950 instead @@ -70,6 +70,7 @@ select = [ # Not included in flake8 "UP", "PERF", + "PGH004", "PLE", "TRY302", ] diff --git a/test/cpp_api_parity/functional_impl_check.py b/test/cpp_api_parity/functional_impl_check.py index c09aaccd8f76b6..828f57e7e69812 100644 --- a/test/cpp_api_parity/functional_impl_check.py +++ b/test/cpp_api_parity/functional_impl_check.py @@ -225,7 +225,7 @@ def build_cpp_tests(unit_test_class, print_cpp_source=False): assert len(unit_test_class.functional_test_params_map) > 0 cpp_sources = TORCH_NN_COMMON_TEST_HARNESS + SAMPLE_FUNCTIONAL_CPP_SOURCE functions = [] - for test_name, test_params in unit_test_class.functional_test_params_map.items(): + for test_params in unit_test_class.functional_test_params_map.values(): cpp_sources += generate_test_cpp_sources(test_params=test_params, template=TORCH_NN_FUNCTIONAL_TEST_FORWARD) functions.append(f'{test_params.functional_variant_name}_test_forward') if print_cpp_source: diff --git a/test/cpp_api_parity/module_impl_check.py b/test/cpp_api_parity/module_impl_check.py index 8ede07b59034a4..aa18798940ae28 100644 --- a/test/cpp_api_parity/module_impl_check.py +++ b/test/cpp_api_parity/module_impl_check.py @@ -292,7 +292,7 @@ def build_cpp_tests(unit_test_class, print_cpp_source=False): assert len(unit_test_class.module_test_params_map) > 0 cpp_sources = TORCH_NN_COMMON_TEST_HARNESS + SAMPLE_MODULE_CPP_SOURCE functions = [] - for test_name, test_params in unit_test_class.module_test_params_map.items(): + for test_params in unit_test_class.module_test_params_map.values(): cpp_sources += generate_test_cpp_sources( test_params=test_params, template=TORCH_NN_MODULE_TEST_FORWARD_BACKWARD) functions.append(f'{test_params.module_variant_name}_test_forward_backward') diff --git a/test/distributed/fsdp/test_fsdp_optim_state.py b/test/distributed/fsdp/test_fsdp_optim_state.py index 21b2cd129d6e2a..f324d1dbc9d894 100644 --- a/test/distributed/fsdp/test_fsdp_optim_state.py +++ b/test/distributed/fsdp/test_fsdp_optim_state.py @@ -1870,14 +1870,14 @@ def step(): step() original_osd = deepcopy(optim.state_dict()) - for param_id, state in original_osd["state"].items(): + for state in original_osd["state"].values(): # Add customized value state["value1"] = 2.74 state["value2"] = None osd = FSDP.optim_state_dict(model, optim, optim_state_dict=original_osd) osd_to_load = FSDP.optim_state_dict_to_load(model, optim, osd) - for param_id, state in osd_to_load["state"].items(): + for state in osd_to_load["state"].values(): self.assertEqual(state["value1"], 2.74) self.assertEqual(state["value2"], None) diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py index 2e5197f3b5bb43..ca971a23fa2dc5 100644 --- a/test/distributions/test_distributions.py +++ b/test/distributions/test_distributions.py @@ -1898,8 +1898,8 @@ def rvs(self, n_sample): self._check_sampler_sampler( MixtureSameFamily(Categorical(probs=probs), Normal(loc, scale)), ScipyMixtureNormal(probs.numpy(), loc.numpy(), scale.numpy()), - '''MixtureSameFamily(Categorical(probs={}), - Normal(loc={}, scale={}))'''.format(probs, loc, scale)) + f'''MixtureSameFamily(Categorical(probs={probs}), + Normal(loc={loc}, scale={scale}))''') def test_normal(self): loc = torch.randn(5, 5, requires_grad=True) diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py index 3cf250e6e5cfba..8c6830b0f4a45b 100644 --- a/test/dynamo/test_modules.py +++ b/test/dynamo/test_modules.py @@ -504,7 +504,7 @@ def __init__( def forward(self, init_features): features = [init_features] - for name, layer in self.items(): + for layer in self.values(): new_features = layer(features) features.append(new_features) return torch.cat(features, 1) diff --git a/test/functorch/discover_coverage.py b/test/functorch/discover_coverage.py index 0926783cff9c35..868f1694974be2 100644 --- a/test/functorch/discover_coverage.py +++ b/test/functorch/discover_coverage.py @@ -321,7 +321,7 @@ def get_all_tested_ops(): overridable_outplace_we_care_about = get_public_overridable_outplace_we_care_about() op_to_opinfo = get_ops_covered_by_opinfos() result = set({}) - for name, op in get_covered_ops(overridable_outplace_we_care_about).items(): + for op in get_covered_ops(overridable_outplace_we_care_about).values(): opinfos = op_to_opinfo[op] for opinfo in opinfos: result.add(opinfo.name) @@ -332,7 +332,7 @@ def get_skipped_or_xfailed_ops_for(test_name): overridable_outplace_we_care_about = get_public_overridable_outplace_we_care_about() op_to_opinfo = get_ops_covered_by_opinfos() result = set({}) - for name, op in get_covered_ops(overridable_outplace_we_care_about).items(): + for op in get_covered_ops(overridable_outplace_we_care_about).values(): opinfos = op_to_opinfo[op] for opinfo in opinfos: for decorator in opinfo.decorators: diff --git a/test/inductor/test_kernel_benchmark.py b/test/inductor/test_kernel_benchmark.py index 105b3180b2331e..93d6670d576fd2 100644 --- a/test/inductor/test_kernel_benchmark.py +++ b/test/inductor/test_kernel_benchmark.py @@ -27,7 +27,7 @@ def setUp(self): def get_compiled_module(self): compiled_module = None - for k, v in PyCodeCache.cache.items(): + for v in PyCodeCache.cache.values(): if hasattr(v, "benchmark_compiled_module"): self.assertTrue( compiled_module is None, "Found multiple compiled modules" diff --git a/test/inductor/test_triton_wrapper.py b/test/inductor/test_triton_wrapper.py index afac34a84fa5b5..ae0725a3de3cf8 100644 --- a/test/inductor/test_triton_wrapper.py +++ b/test/inductor/test_triton_wrapper.py @@ -12,7 +12,7 @@ class TestTritonWrapper(TestCase): def get_compiled_module(self): compiled_module = None - for k, v in PyCodeCache.cache.items(): + for v in PyCodeCache.cache.values(): if hasattr(v, "benchmark_compiled_module"): self.assertTrue( compiled_module is None, "Found multiple compiled modules" diff --git a/test/jit/fixtures_srcs/test_upgrader_models_generation.py b/test/jit/fixtures_srcs/test_upgrader_models_generation.py index ae11a49bd2373d..58267c1e0ea320 100644 --- a/test/jit/fixtures_srcs/test_upgrader_models_generation.py +++ b/test/jit/fixtures_srcs/test_upgrader_models_generation.py @@ -7,7 +7,7 @@ class TestUpgraderModelGeneration(TestCase): def test_all_modules(self): - for a_module, expect_operator in ALL_MODULES.items(): + for a_module in ALL_MODULES.keys(): module_name = type(a_module).__name__ self.assertTrue( isinstance(a_module, torch.nn.Module), diff --git a/test/jit/test_list_dict.py b/test/jit/test_list_dict.py index 57b2281070b3f1..85082b494e01c6 100644 --- a/test/jit/test_list_dict.py +++ b/test/jit/test_list_dict.py @@ -1964,7 +1964,7 @@ def __init__(self, configs): self.configs = configs def forward(self, x): - for _id, config in self.configs.items(): + for config in self.configs.values(): x += config.size return x diff --git a/test/jit/test_tracer.py b/test/jit/test_tracer.py index f7fd1f22a68a7a..1d241172762a22 100644 --- a/test/jit/test_tracer.py +++ b/test/jit/test_tracer.py @@ -2383,7 +2383,7 @@ def __init__(self): def forward(self, feature_map: Dict[str, List[Tensor]]) -> Tensor: output = [] - for i, j in feature_map.items(): + for j in feature_map.values(): output.append(self.linear(j[0])) return torch.stack(output) diff --git a/test/quantization/eager/test_numeric_suite_eager.py b/test/quantization/eager/test_numeric_suite_eager.py index 128f7cb96a06f2..a798745d6537d4 100644 --- a/test/quantization/eager/test_numeric_suite_eager.py +++ b/test/quantization/eager/test_numeric_suite_eager.py @@ -104,7 +104,7 @@ def compare_and_validate_results(float_model, q_model): float_model.state_dict(), q_model.state_dict() ) self.assertEqual(len(weight_dict), 1) - for k, v in weight_dict.items(): + for v in weight_dict.values(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine)] @@ -126,7 +126,7 @@ def compare_and_validate_results(float_model, q_model): float_model.state_dict(), q_model.state_dict() ) self.assertEqual(len(weight_dict), 1) - for k, v in weight_dict.items(): + for v in weight_dict.values(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [AnnotatedSingleLayerLinearModel(qengine)] @@ -148,7 +148,7 @@ def compare_and_validate_results(float_model, q_model): float_model.state_dict(), q_model.state_dict() ) self.assertEqual(len(weight_dict), 1) - for k, v in weight_dict.items(): + for v in weight_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -172,7 +172,7 @@ def compare_and_validate_results(float_model, q_model): float_model.state_dict(), q_model.state_dict() ) self.assertEqual(len(weight_dict), 1) - for k, v in weight_dict.items(): + for v in weight_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -194,7 +194,7 @@ def test_compare_model_stub_conv_static(self): def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -221,7 +221,7 @@ def test_compare_model_stub_linear_static(self): def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -246,7 +246,7 @@ def test_compare_model_stub_partial(self): def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -301,7 +301,7 @@ def test_compare_model_stub_functional_static(self): self.assertTrue(isinstance(q_model.myadd_relu, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_add, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_mul, Shadow)) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -315,7 +315,7 @@ def test_compare_model_stub_linear_dynamic(self): def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -344,7 +344,7 @@ def compare_and_validate_results( float_model, q_model, module_swap_list, input, hidden ) self.assertEqual(len(ob_dict), 1) - for k, v in ob_dict.items(): + for v in ob_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -375,7 +375,7 @@ def compare_and_validate_results(float_model, q_model, data): expected_act_compare_dict_keys = {"conv.stats", "quant.stats"} self.assertTrue(act_compare_dict.keys() == expected_act_compare_dict_keys) - for k, v in act_compare_dict.items(): + for v in act_compare_dict.values(): self.assertTrue(v["float"][0].shape == v["quantized"][0].shape) model_list = [AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine)] @@ -398,7 +398,7 @@ def compare_and_validate_results(float_model, q_model, data): expected_act_compare_dict_keys = {"fc1.quant.stats", "fc1.module.stats"} self.assertTrue(act_compare_dict.keys() == expected_act_compare_dict_keys) - for k, v in act_compare_dict.items(): + for v in act_compare_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -434,7 +434,7 @@ def test_compare_model_outputs_functional_static(self): "quant.stats", } self.assertTrue(act_compare_dict.keys() == expected_act_compare_dict_keys) - for k, v in act_compare_dict.items(): + for v in act_compare_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -451,7 +451,7 @@ def compare_and_validate_results(float_model, q_model, data): expected_act_compare_dict_keys = {"fc1.stats"} self.assertTrue(act_compare_dict.keys() == expected_act_compare_dict_keys) - for k, v in act_compare_dict.items(): + for v in act_compare_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape) @@ -480,7 +480,7 @@ def compare_and_validate_results(float_model, q_model, input, hidden): expected_act_compare_dict_keys = {"lstm.stats"} self.assertTrue(act_compare_dict.keys() == expected_act_compare_dict_keys) - for k, v in act_compare_dict.items(): + for v in act_compare_dict.values(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(len(v["float"][i]) == len(v["quantized"][i])) diff --git a/test/quantization/fx/test_numeric_suite_fx.py b/test/quantization/fx/test_numeric_suite_fx.py index f84e2048775346..191f927c7b217f 100644 --- a/test/quantization/fx/test_numeric_suite_fx.py +++ b/test/quantization/fx/test_numeric_suite_fx.py @@ -637,7 +637,7 @@ def test_op_relationship_mapping(self): # 4. go through the ops mapped to each QuantizeHandler type, and verify # correctness. def _op_in_base_sets_of_related_ops(op): - for name, ops in base_name_to_sets_of_related_ops.items(): + for ops in base_name_to_sets_of_related_ops.values(): if op in ops: return True return False @@ -1829,7 +1829,7 @@ def test_extend_logger_results_with_comparison(self): results, 'fp32', 'int8', compute_cosine_similarity, 'cosine_similarity_int8_vs_fp32') - for layer_name, layer_results in results.items(): + for layer_results in results.values(): assert 'sqnr_int8_vs_fp32' in \ layer_results['weight']['int8'][0].keys() assert 'l2_error_int8_vs_fp32' in \ diff --git a/test/run_test.py b/test/run_test.py index b81544d8b4d634..f676a0f4782c08 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -846,7 +846,7 @@ def run_doctests(test_module, test_directory, options): if enabled["qengine"] == "auto": try: # Is there a better check if quantization is enabled? - import torch.ao.nn.quantized as nnq # NOQA + import torch.ao.nn.quantized as nnq # NOQA: F401 torch.backends.quantized.engine = "qnnpack" torch.backends.quantized.engine = "fbgemm" @@ -857,9 +857,9 @@ def run_doctests(test_module, test_directory, options): if enabled["onnx"] == "auto": try: - import onnx # NOQA - import onnxruntime # NOQA - import onnxscript # NOQA + import onnx # NOQA: F401 + import onnxruntime # NOQA: F401 + import onnxscript # NOQA: F401 except ImportError: exclude_module_list.append("torch.onnx.*") enabled["onnx"] = False diff --git a/test/test_dispatch.py b/test/test_dispatch.py index e98385a8ce3a25..cb485bda7af49c 100644 --- a/test/test_dispatch.py +++ b/test/test_dispatch.py @@ -782,11 +782,11 @@ def test_find_dangling_impls_ext(self): impls = C._dispatch_find_dangling_impls() self.assertEqual(1, len(impls)) self.assertEqual( - '''\ + f'''\ name: __test::foo schema: (none) -CPU: registered at {}:5 :: () -> () [ boxed unboxed ] -'''.format(extension_path), +CPU: registered at {extension_path}:5 :: () -> () [ boxed unboxed ] +''', impls[0]) def test_dispatch_print_registrations_for_dispatch_key_invalid(self): diff --git a/test/test_fx.py b/test/test_fx.py index 15daf07ac2435f..e7480dc4282b1f 100644 --- a/test/test_fx.py +++ b/test/test_fx.py @@ -3490,7 +3490,7 @@ def f_sum(x): def f_sum_dict(x): out = 0 - for k, v in x.items(): + for v in x.values(): out += v return out @@ -4302,7 +4302,7 @@ def _get_functional(cls): try: sig = inspect.signature(fn) has_tensor_arg = False - for arg, param in sig.parameters.items(): + for param in sig.parameters.values(): if isinstance(param.annotation, type) and issubclass(param.annotation, torch.Tensor): has_tensor_arg = True if not has_tensor_arg: diff --git a/test/test_jit.py b/test/test_jit.py index cd7312ad92ec0c..cfc5aa986f43b8 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -3497,12 +3497,12 @@ def test_sequence_parsing(self): ] for exp, result in tests: cu = torch.jit.CompilationUnit() - full = """ + full = f""" def bar(x, y): return x + y def foo(x): - {} - """.format(exp) + {exp} + """ if isinstance(result, str): with self.assertRaisesRegex(RuntimeError, result): cu.define(full) @@ -4006,7 +4006,7 @@ def replace(e): return e.getattr('name') return e - for k, v in result.items(): + for v in result.values(): for i in range(len(v)): if isinstance(v[i], tuple): n, v2 = v[i] @@ -13065,10 +13065,10 @@ def test_method_casts_script(self): ] for cast_type in cast_types: - cu = torch.jit.CompilationUnit(''' + cu = torch.jit.CompilationUnit(f''' def cast_to(x): return x.{cast_type}() - '''.format(cast_type=cast_type)) + ''') x = torch.rand(3, 4, 5) * 128 cu_result = cu.cast_to(x) diff --git a/test/test_linalg.py b/test/test_linalg.py index 9c85af4fbd6eaf..eb4f156182e978 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -5328,16 +5328,14 @@ def tracker(worker): elapsed_scipy_ms = 1000.0 * elapsed_scipy / repeat elapsed_general_scipy_ms = 1000.0 * elapsed_general_scipy / repeat - print(''' + print(f''' CPU timings: torch.lobpcg vs scipy.sparse.linalg.lobpcg ------------------------------------------------------- | standard | generalized | method -torch.lobpcg | {:10.2f} | {:10.2f} | ortho -scipy_lobpcg | {:10.2f} | {:10.2f} | N/A --(input size: {:4}, eigenpairs:{:2}, units: ms per call)- - '''.format(elapsed_ortho_ms, elapsed_ortho_general_ms, - elapsed_scipy_ms, elapsed_general_scipy_ms, - m, k)) +torch.lobpcg | {elapsed_ortho_ms:10.2f} | {elapsed_ortho_general_ms:10.2f} | ortho +scipy_lobpcg | {elapsed_scipy_ms:10.2f} | {elapsed_general_scipy_ms:10.2f} | N/A +-(input size: {m:4}, eigenpairs:{k:2}, units: ms per call)- + ''') # Handling of very small tolerence tol = 1e-100 @@ -5378,14 +5376,14 @@ def tracker(worker): iters2_general = -1 eq_err_general_scipy = -1 - print('''\ -Handling of small tol={:6.0e}: torch.lobpcg vs scipy.sparse.linalg.lobpcg + print(f'''\ +Handling of small tol={tol:6.0e}: torch.lobpcg vs scipy.sparse.linalg.lobpcg ---------------------------------------------------------------------------- | standard | generalized | niter | method -torch.lobpcg | {:10.2e} | {:10.2e} | {:6} | ortho -scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A ----(input size: {:4}, eigenpairs:{:2}, units: relative error, maxiter={:4})--- -'''.format(tol, eq_err, eq_err_general, iters1, eq_err_scipy, eq_err_general_scipy, iters2, m, k, niter)) +torch.lobpcg | {eq_err:10.2e} | {eq_err_general:10.2e} | {iters1:6} | ortho +scipy_lobpcg | {eq_err_scipy:10.2e} | {eq_err_general_scipy:10.2e} | {iters2:6} | N/A +---(input size: {m:4}, eigenpairs:{k:2}, units: relative error, maxiter={niter:4})--- +''') def _test_addmm_addmv(self, f, t, m, v, *, alpha=None, beta=None, transpose_out=False, activation=None): dtype = t.dtype diff --git a/test/test_mkldnn_fusion.py b/test/test_mkldnn_fusion.py index 59c6af1c56a61d..4858a27dec9145 100644 --- a/test/test_mkldnn_fusion.py +++ b/test/test_mkldnn_fusion.py @@ -204,7 +204,7 @@ def forward(self, x): x = self.unary(x) return x - for pointwise_name, pointwise_info in self._unary_list().items(): + for pointwise_info in self._unary_list().values(): options = itertools.product([[2, 3, 10], [2, 10]], [True, False]) for input_shape, bias in options: with torch.no_grad(): @@ -233,7 +233,7 @@ def forward(self, x): return x input_shapes = {2: (112, 112), 3: (55, 55, 55)} - for pointwise_name, pointwise_info in self._unary_list().items(): + for pointwise_info in self._unary_list().values(): for dim in [2, 3]: channels_last = torch.channels_last if dim == 2 else torch.channels_last_3d options = itertools.product([True, False], [1, 2], [1, 4], [torch.contiguous_format, channels_last]) @@ -347,7 +347,7 @@ def forward(self, x): input_shapes = {2: (28, 28)} kernel_size = 3 - for pointwise_name, pointwise_info in self._unary_list().items(): + for pointwise_info in self._unary_list().values(): for dim in [2]: channels_last = torch.channels_last if dim == 2 else torch.channels_last_3d options = itertools.product([True, False], [1, 2], [1, 4], [torch.contiguous_format, channels_last], [False, True]) diff --git a/test/test_nn.py b/test/test_nn.py index c29a031e4d3e7e..e7ac3aacacb38c 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -2890,7 +2890,7 @@ def _test_loss_equal_input_target_shape(self, cast): input = cast(torch.randn(3, 5)) target = cast(torch.randn(5, 3)) - for _name, fn in losses.items(): + for fn in losses.values(): self.assertRaises(Exception, lambda: fn(input, target)) def test_loss_equal_input_target_shape(self): @@ -5522,7 +5522,7 @@ def test_pointwise_loss_broadcast(self): } input = torch.randn(2, 1, requires_grad=True) - for _name, fn in losses.items(): + for fn in losses.values(): for requires_grad in [True, False]: # When target.requires_grad=True, its impl is in Python, while the other is in TH. target = torch.randn(2, 10, requires_grad=requires_grad) diff --git a/test/test_ops.py b/test/test_ops.py index 3b43a56bc4c36b..ca93ab161a9c95 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1325,7 +1325,7 @@ def unsupported(dtype, e): # one or more tensors requiring grad def _tensor_requires_grad(x): if isinstance(x, dict): - for k, v in x.items(): + for v in x.values(): if _tensor_requires_grad(v): return True if isinstance(x, (list, tuple)): diff --git a/test/test_serialization.py b/test/test_serialization.py index 76739ed5c62493..82640442769ce2 100644 --- a/test/test_serialization.py +++ b/test/test_serialization.py @@ -3908,7 +3908,7 @@ def forward(self, input): state_dict = m.state_dict() torch.save(state_dict, f) result = torch.load(f, mmap=True) - for k, v in result.items(): + for v in result.values(): self.assertTrue(v.is_cuda) def run(self, *args, **kwargs): diff --git a/tools/code_analyzer/gen_oplist.py b/tools/code_analyzer/gen_oplist.py index 0a9e2a1539b6a7..ec130ca40ce1f1 100644 --- a/tools/code_analyzer/gen_oplist.py +++ b/tools/code_analyzer/gen_oplist.py @@ -18,7 +18,7 @@ def extract_all_operators(selective_builder: SelectiveBuilder) -> Set[str]: ops = [] - for op_name, op in selective_builder.operators.items(): + for op_name in selective_builder.operators.keys(): ops.append(op_name) return set(ops) @@ -74,7 +74,7 @@ def gen_supported_mobile_models(model_dicts: List[Any], output_dir: str) -> None if "debug_info" in model_dict: debug_info = json.loads(model_dict["debug_info"][0]) if debug_info["is_new_style_rule"]: - for asset, asset_info in debug_info["asset_info"].items(): + for asset_info in debug_info["asset_info"].values(): md5_hashes.update(asset_info["md5_hash"]) supported_hashes = "" diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index 1b2c31e9d2b997..4f894b044f3445 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -2285,7 +2285,7 @@ def one_layer_rnn_data( hh_bias = params[3] if has_biases else None step_output = [] - hiddens: List["torch.Tensor"] = [] + hiddens: List[torch.Tensor] = [] last_batch_size = batch_sizes[-1] if reverse else batch_sizes[0] cur_hidden = hidden.narrow(0, 0, last_batch_size) diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 5df8adae09cecb..8e67d7949715dc 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -832,7 +832,7 @@ def __init__( ): guards = output_graph.guards if output_graph else None self.valid = True - self._weakrefs: List["ReferenceType[object]"] = [] + self._weakrefs: List[ReferenceType[object]] = [] self._seen_ids: Set[int] = set() self.output_graph = output_graph diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index e4cd65d7de1a5f..08312cbb535c02 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -151,7 +151,7 @@ def increment_op_count(cnt): def print_time_report(): total = 0 total_by_key = {} - for frame, timings in frame_phase_timing.items(): + for timings in frame_phase_timing.values(): for key, timing in timings.items(): total += timing if key not in total_by_key: diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index 95250fe0f39ac8..9827556bd8a5b5 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -312,7 +312,7 @@ def call_function( ): raise UserError( UserErrorType.DYNAMIC_CONTROL_FLOW, - "Expected a list of tensors but got {actual_args}".format( + "Expected a list of tensors but got {actual_args}".format( # noqa: UP032 actual_args=[ str(operand.python_type()) if isinstance(operand, VariableTracker) diff --git a/torch/_export/__init__.py b/torch/_export/__init__.py index ce5abf100db03a..771b18e64cf1c2 100644 --- a/torch/_export/__init__.py +++ b/torch/_export/__init__.py @@ -162,7 +162,7 @@ def export( **kwargs, ) - params_buffers: "OrderedDict[str, Union[torch.Tensor, torch.nn.Parameter]]" = OrderedDict() + params_buffers: OrderedDict[str, Union[torch.Tensor, torch.nn.Parameter]] = OrderedDict() for name, param in gm_torch_level.named_parameters(recurse=True, remove_duplicate=False): params_buffers[name] = param diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py index 92b10455708020..ae40e3a12a812d 100644 --- a/torch/_inductor/codegen/wrapper.py +++ b/torch/_inductor/codegen/wrapper.py @@ -122,9 +122,9 @@ def __str__(self): class MemoryPlanningState: def __init__(self): super().__init__() - self.reuse_pool: Dict[ - Any, List["FreeIfNotReusedLine"] - ] = collections.defaultdict(list) + self.reuse_pool: Dict[Any, List[FreeIfNotReusedLine]] = collections.defaultdict( + list + ) def __contains__(self, key): return bool(self.reuse_pool.get(key, None)) diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py index e57e040ce616b5..20ca00603d17c2 100644 --- a/torch/_inductor/graph.py +++ b/torch/_inductor/graph.py @@ -452,7 +452,7 @@ def visit(value): value.realize() return value - for key, value in self.env.items(): + for value in self.env.values(): try: visit(value) except Exception: diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py index 3ecc2ff00d63b1..20f7a286964f46 100644 --- a/torch/_inductor/scheduler.py +++ b/torch/_inductor/scheduler.py @@ -58,7 +58,7 @@ def fuse(node1: "BaseSchedulerNode", node2: "BaseSchedulerNode"): class BaseSchedulerNode: def __init__(self, scheduler: "Scheduler", node: ir.Buffer): - self.scheduler: "Scheduler" = scheduler + self.scheduler: Scheduler = scheduler self.node: ir.Buffer = node self.users: Optional[List[NodeUser]] = None self.inverse_users: List[BaseSchedulerNode] = [] diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py index 4b93d25f547306..880c1902a9925c 100644 --- a/torch/_torch_docs.py +++ b/torch/_torch_docs.py @@ -14053,23 +14053,19 @@ def merge_dicts(*dicts): if hasattr(torch, unary_foreach_func_name): add_docstr( getattr(torch, unary_foreach_func_name), - r""" -{}(self: List[Tensor]) -> List[Tensor] + rf""" +{unary_foreach_func_name}(self: List[Tensor]) -> List[Tensor] -Apply :func:`torch.{}` to each Tensor of the input list. - """.format( - unary_foreach_func_name, unary_base_func_name - ), +Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list. + """, ) unary_inplace_foreach_func_name = f"{unary_foreach_func_name}_" if hasattr(torch, unary_inplace_foreach_func_name): add_docstr( getattr(torch, unary_inplace_foreach_func_name), - r""" -{}(self: List[Tensor]) -> None + rf""" +{unary_inplace_foreach_func_name}(self: List[Tensor]) -> None -Apply :func:`torch.{}` to each Tensor of the input list. - """.format( - unary_inplace_foreach_func_name, unary_base_func_name - ), +Apply :func:`torch.{unary_base_func_name}` to each Tensor of the input list. + """, ) diff --git a/torch/ao/ns/fx/graph_matcher.py b/torch/ao/ns/fx/graph_matcher.py index ed90f530ba2694..8db946ec707a71 100644 --- a/torch/ao/ns/fx/graph_matcher.py +++ b/torch/ao/ns/fx/graph_matcher.py @@ -134,7 +134,7 @@ def _recursively_add_node_arg_to_stack(self, arg: Any) -> None: for inner_arg in arg: self._recursively_add_node_arg_to_stack(inner_arg) elif isinstance(arg, torch.fx.immutable_collections.immutable_dict): - for key, value in arg.items(): + for value in arg.values(): self._recursively_add_node_arg_to_stack(value) def _is_matchable(self, node: Node) -> bool: diff --git a/torch/ao/ns/fx/graph_passes.py b/torch/ao/ns/fx/graph_passes.py index 3f4e156859024b..edd5284cf6eb6f 100644 --- a/torch/ao/ns/fx/graph_passes.py +++ b/torch/ao/ns/fx/graph_passes.py @@ -424,7 +424,7 @@ def _can_insert(node_a_arg, gm_a): return False cur_idx += 1 - for kwarg_name, kwarg_val in norm_kwargs.items(): + for kwarg_val in norm_kwargs.values(): # stitch the inputs from base graph if cur_idx == 0: pass diff --git a/torch/ao/ns/fx/mappings.py b/torch/ao/ns/fx/mappings.py index dbc9097cb0556c..b1a872056d16cf 100644 --- a/torch/ao/ns/fx/mappings.py +++ b/torch/ao/ns/fx/mappings.py @@ -471,7 +471,7 @@ def add_op_to_sets_of_related_ops( related_op: Optional[NSNodeTargetType], ) -> None: if related_op is not None: - for base_name, set_of_related_ops in base_name_to_sets_of_related_ops.items(): + for set_of_related_ops in base_name_to_sets_of_related_ops.values(): if related_op in set_of_related_ops: set_of_related_ops.add(op) return diff --git a/torch/ao/ns/fx/n_shadows_utils.py b/torch/ao/ns/fx/n_shadows_utils.py index fa328deb0f592a..dba4e133e66071 100644 --- a/torch/ao/ns/fx/n_shadows_utils.py +++ b/torch/ao/ns/fx/n_shadows_utils.py @@ -704,7 +704,7 @@ def create_add_loggers_graph( from torch.ao.ns._numeric_suite_fx import OutputLogger, OutputComparisonLogger def _get_subgraph_containing_node(node, subgraphs_dedup): - for name, subgraph in subgraphs_dedup.items(): + for subgraph in subgraphs_dedup.values(): if node in subgraph: return subgraph return None @@ -1289,7 +1289,7 @@ def print_n_shadows_summary( return results = [] - for subgraph_name, subgraph_data in results_comparison.items(): + for subgraph_data in results_comparison.values(): mean_all_candidates = [ candidate['cmp_mean'] for candidate_name, candidate in subgraph_data['candidates'].items() diff --git a/torch/ao/ns/fx/pattern_utils.py b/torch/ao/ns/fx/pattern_utils.py index bb6d3f14431a3b..2925dfe012125f 100644 --- a/torch/ao/ns/fx/pattern_utils.py +++ b/torch/ao/ns/fx/pattern_utils.py @@ -26,7 +26,7 @@ def get_type_a_related_to_b( # TODO(future PR): add the rest of modules and ops here type_a_related_to_b: Set[Tuple[NSNodeTargetType, NSNodeTargetType]] = set() - for base_name, s in base_name_to_sets_of_related_ops.items(): + for s in base_name_to_sets_of_related_ops.values(): s_list = list(s) # add every bidirectional pair for idx_0 in range(0, len(s_list)): @@ -70,7 +70,7 @@ def get_reversed_fusions() -> List[Tuple[NSFusionType, int]]: all_quant_patterns = _get_pattern_to_quantize_handlers(get_native_backend_config()) default_base_op_idx = 0 - for quant_pattern, _quant_handler in all_quant_patterns.items(): + for quant_pattern in all_quant_patterns.keys(): # TODO: this is a temporary hack to flatten the patterns from quantization so # that it works with the ns matcher function, maybe we should use `_is_match` # in torch.ao.quantization.fx.match_utils to match the patterns diff --git a/torch/ao/ns/fx/utils.py b/torch/ao/ns/fx/utils.py index 8d6f54ef9c148f..bf35a7e531e1ab 100644 --- a/torch/ao/ns/fx/utils.py +++ b/torch/ao/ns/fx/utils.py @@ -363,7 +363,7 @@ def rekey_logger_info_on_node_name_of_model( new_results = {} for old_layer_name, result_type_to_results in results.items(): new_layer_name = None - for _result_type, model_name_to_results in result_type_to_results.items(): + for model_name_to_results in result_type_to_results.values(): for cur_model_name, list_of_results in model_name_to_results.items(): if cur_model_name == model_name: assert len(list_of_results) @@ -389,8 +389,8 @@ def maybe_add_missing_fqns(results: NSResultsType) -> None: # Check in the first result to find any model with fqn entries defined. model_name_with_fqns = None - for layer_name, result_type_to_results in results.items(): - for result_type, model_name_to_results in result_type_to_results.items(): + for result_type_to_results in results.values(): + for model_name_to_results in result_type_to_results.values(): for model_name, model_results in model_name_to_results.items(): if len(model_results) > 0: if model_results[0]["fqn"] is not None: @@ -400,8 +400,8 @@ def maybe_add_missing_fqns(results: NSResultsType) -> None: break if model_name_with_fqns: - for layer_name, result_type_to_results in results.items(): - for result_type, model_name_to_results in result_type_to_results.items(): + for result_type_to_results in results.values(): + for model_name_to_results in result_type_to_results.values(): ref_model_results = model_name_to_results[model_name_with_fqns] for model_name, model_results in model_name_to_results.items(): if model_name == model_name_with_fqns: diff --git a/torch/ao/quantization/fuse_modules.py b/torch/ao/quantization/fuse_modules.py index 7c7ef1a88e83a7..77a95b7e0873b5 100644 --- a/torch/ao/quantization/fuse_modules.py +++ b/torch/ao/quantization/fuse_modules.py @@ -56,11 +56,11 @@ def fuse_known_modules(mod_list, is_qat, additional_fuser_method_mapping=None): fused = fuser_method(is_qat, *mod_list) # NOTE: forward hooks not processed in the two following for loops will be lost after the fusion # Move pre forward hooks of the base module to resulting fused module - for handle_id, pre_hook_fn in mod_list[0]._forward_pre_hooks.items(): + for pre_hook_fn in mod_list[0]._forward_pre_hooks.values(): fused.register_forward_pre_hook(pre_hook_fn) mod_list[0]._forward_pre_hooks.clear() # Move post forward hooks of the last module to resulting fused module - for handle_id, hook_fn in mod_list[-1]._forward_hooks.items(): + for hook_fn in mod_list[-1]._forward_hooks.values(): fused.register_forward_hook(hook_fn) mod_list[-1]._forward_hooks.clear() new_mod[0] = fused diff --git a/torch/ao/quantization/fx/prepare.py b/torch/ao/quantization/fx/prepare.py index aa9f1f7467f932..803a52c4b570c5 100644 --- a/torch/ao/quantization/fx/prepare.py +++ b/torch/ao/quantization/fx/prepare.py @@ -1370,7 +1370,7 @@ def insert_observers_for_model( # Step 1, set the observer or fake quantize module constructor for each node in the # matched_node_pattern - for node_name, match_res_with_qconfig in node_name_to_match_result_with_qconfig.items(): + for match_res_with_qconfig in node_name_to_match_result_with_qconfig.values(): last_node, matched_node_pattern, pattern, qhandler, qconfig = match_res_with_qconfig assert qhandler is not None _set_target_dtype_info_for_matched_node_pattern( @@ -1425,7 +1425,7 @@ def insert_observers_for_model( # reset the counters and set of processed_nodes processed_nodes: Set[Node] = set() - for node_name, match_res_with_qconfig in node_name_to_match_result_with_qconfig.items(): + for match_res_with_qconfig in node_name_to_match_result_with_qconfig.values(): last_node, matched_node_pattern, pattern, qhandler, qconfig = match_res_with_qconfig is_supported_by_backend = _is_pattern_dtype_config_and_qconfig_supported_by_backend( pattern, matched_node_pattern, qconfig, backend_config) @@ -1654,10 +1654,7 @@ def _run_prepare_fx_on_standalone_modules( not modify the graph, it just replaces the unobserved modules with their observed versions. """ - for ( - node_name, - (root_node, _, pattern, qhandler, qconfig), - ) in node_name_to_match_result_with_qconfig.items(): + for (root_node, _, pattern, qhandler, qconfig) in node_name_to_match_result_with_qconfig.values(): if qhandler is None: continue elif not qhandler.is_standalone_module(): diff --git a/torch/ao/quantization/fx/qconfig_mapping_utils.py b/torch/ao/quantization/fx/qconfig_mapping_utils.py index d2e11cb2fd6f81..0b906a1777de01 100644 --- a/torch/ao/quantization/fx/qconfig_mapping_utils.py +++ b/torch/ao/quantization/fx/qconfig_mapping_utils.py @@ -217,13 +217,13 @@ def _compare_prepare_convert_qconfig_mappings( ] dict_names = [_OBJECT_TYPE_DICT_KEY, _MODULE_NAME_DICT_KEY, _MODULE_NAME_REGEX_DICT_KEY] for i in range(len(prepare_dicts)): - for name, qconfig in prepare_dicts[i].items(): - assert name in convert_dicts[i], "Missing key {} {} in convert QConfigMapping \ - when it was present in prepare".format(dict_names[i], name) + for name in prepare_dicts[i].keys(): + assert name in convert_dicts[i], f"Missing key {dict_names[i]} {name} in convert QConfigMapping \ + when it was present in prepare" assert convert_dicts[i][name] is None \ or qconfig_equals(prepare_dicts[i][name], convert_dicts[i][name]), \ - "Expected convert QConfigMapping to have the same qconfig as prepare for key {} {}; \ - prepare: {}; convert: {}".format(dict_names[i], name, prepare_dicts[i][name], convert_dicts[i][name]) + f"Expected convert QConfigMapping to have the same qconfig as prepare for key {dict_names[i]} {name}; \ + prepare: {prepare_dicts[i][name]}; convert: {convert_dicts[i][name]}" def _is_qconfig_supported_by_dtype_configs(qconfig: QConfig, dtype_configs: List[DTypeConfig]): for dtype_config in dtype_configs: diff --git a/torch/ao/quantization/pt2e/qat_utils.py b/torch/ao/quantization/pt2e/qat_utils.py index dae5e9092c4ffb..c4665bb24a2a57 100644 --- a/torch/ao/quantization/pt2e/qat_utils.py +++ b/torch/ao/quantization/pt2e/qat_utils.py @@ -556,7 +556,7 @@ def _fuse_conv_bn_qat(m: GraphModule) -> GraphModule: _get_conv_bn_getitem_nodes(r.replacements) # Step (3a): Copy over metadata for all three nodes in [conv - bn - getitem] - for match_pattern_node, original_node in _filter_nodes_map(r.nodes_map).items(): + for original_node in _filter_nodes_map(r.nodes_map).values(): if original_node.target == torch.ops.aten.convolution.default: replacement_conv_node.meta = original_node.meta original_to_replacement_node[original_node] = replacement_conv_node diff --git a/torch/ao/quantization/pt2e/quantizer/x86_inductor_quantizer.py b/torch/ao/quantization/pt2e/quantizer/x86_inductor_quantizer.py index cef306745dbb25..36f7785b162865 100644 --- a/torch/ao/quantization/pt2e/quantizer/x86_inductor_quantizer.py +++ b/torch/ao/quantization/pt2e/quantizer/x86_inductor_quantizer.py @@ -73,7 +73,7 @@ def _get_supported_x86_inductor_config_and_operators() -> List[OperatorConfig]: supported_config_and_operators: List[OperatorConfig] = [] for quantization_config in [get_default_x86_inductor_quantization_config(), ]: ops = _supported_quantized_operators() - for op_string, pattern_list in ops.items(): + for pattern_list in ops.values(): supported_config_and_operators.append( OperatorConfig(quantization_config, pattern_list) ) diff --git a/torch/ao/quantization/pt2e/quantizer/xnnpack_quantizer.py b/torch/ao/quantization/pt2e/quantizer/xnnpack_quantizer.py index 3eeebe1a5a823e..bda553d03b01e8 100644 --- a/torch/ao/quantization/pt2e/quantizer/xnnpack_quantizer.py +++ b/torch/ao/quantization/pt2e/quantizer/xnnpack_quantizer.py @@ -116,7 +116,7 @@ def _get_supported_symmetric_config_and_operators() -> List[OperatorConfig]: get_symmetric_quantization_config(is_per_channel=True, is_qat=True), ]: ops = _supported_symmetric_quantized_operators() - for op_string, pattern_list in ops.items(): + for pattern_list in ops.values(): supported_config_and_operators.append( OperatorConfig(quantization_config, pattern_list) ) @@ -517,7 +517,7 @@ def _annotate_linear( output_act_qspec = get_output_act_qspec(quantization_config) weight_qspec = get_weight_qspec(quantization_config) bias_qspec = get_bias_qspec(quantization_config) - for module_or_fn_type, partitions in module_partitions.items(): + for partitions in module_partitions.values(): for p in partitions: act_nodes = [ n diff --git a/torch/distributed/_shard/sharded_tensor/__init__.py b/torch/distributed/_shard/sharded_tensor/__init__.py index 18d5d513202b05..bb0271ca1826bb 100644 --- a/torch/distributed/_shard/sharded_tensor/__init__.py +++ b/torch/distributed/_shard/sharded_tensor/__init__.py @@ -408,7 +408,7 @@ def pre_load_state_dict_hook(module, state_dict, prefix, local_metadata, strict, Pre-load state dict hook to add ShardedTensor to the module. """ for submodule_name, submodule in module.named_modules(): - for attr_name, attr in submodule.__dict__.items(): + for attr_name in submodule.__dict__.keys(): mod_prefix = prefix + submodule_name key = mod_prefix + ('.' if mod_prefix else '') + attr_name if key in state_dict: diff --git a/torch/distributed/fsdp/_optim_utils.py b/torch/distributed/fsdp/_optim_utils.py index 25a07b6493900c..6bb8ed98b4684b 100644 --- a/torch/distributed/fsdp/_optim_utils.py +++ b/torch/distributed/fsdp/_optim_utils.py @@ -446,7 +446,7 @@ def _flatten_optim_state_dict( for fqn in fqns: if not unflat_osd_state[fqn]: continue - for state_name, param_state in unflat_osd_state[fqn].items(): + for state_name in unflat_osd_state[fqn].keys(): unflat_osd_state[fqn][state_name] = _broadcast_state( fsdp_state, unflat_osd_state[fqn][state_name], group=group ) diff --git a/torch/distributions/distribution.py b/torch/distributions/distribution.py index bc6910e98c47a9..71ab76682814ab 100644 --- a/torch/distributions/distribution.py +++ b/torch/distributions/distribution.py @@ -314,7 +314,7 @@ def _get_checked_instance(self, cls, _instance=None): def __repr__(self) -> str: param_names = [k for k, _ in self.arg_constraints.items() if k in self.__dict__] - args_string = ', '.join(['{}: {}'.format(p, self.__dict__[p] + args_string = ', '.join(['{}: {}'.format(p, self.__dict__[p] # noqa: UP032 if self.__dict__[p].numel() == 1 else self.__dict__[p].size()) for p in param_names]) return self.__class__.__name__ + '(' + args_string + ')' diff --git a/torch/fx/_symbolic_trace.py b/torch/fx/_symbolic_trace.py index cef8553e56d2fa..b0cf86c8c5db64 100644 --- a/torch/fx/_symbolic_trace.py +++ b/torch/fx/_symbolic_trace.py @@ -739,7 +739,7 @@ def trace( self.root = torch.nn.Module() fn = root - tracer_cls: Optional[Type["Tracer"]] = getattr(self, "__class__", None) + tracer_cls: Optional[Type[Tracer]] = getattr(self, "__class__", None) self.graph = Graph(tracer_cls=tracer_cls) if hasattr(fn, '__code__'): code = fn.__code__ diff --git a/torch/fx/experimental/partitioner_utils.py b/torch/fx/experimental/partitioner_utils.py index eb306b9581e337..b56cf0102f696d 100644 --- a/torch/fx/experimental/partitioner_utils.py +++ b/torch/fx/experimental/partitioner_utils.py @@ -12,8 +12,8 @@ class Partition: def __init__(self, partition_id: int) -> None: self.nodes: Set[Node] = set() self.partition_id = partition_id - self.parents: Set["Partition"] = set() - self.children: Set["Partition"] = set() + self.parents: Set[Partition] = set() + self.children: Set[Partition] = set() self.bfs_level: int = -1 self.used_mem_bytes: int = 0 self.logical_device_ids: List[int] = [] diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 9f95c0390098df..ce57ac4856feb5 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -1927,26 +1927,26 @@ def __init__( self.guards: List[ShapeGuard] = [] # Maps symbolic ints to their original concrete values # Currently populated from tensors - self.var_to_val: Dict["sympy.Symbol", "sympy.Integer"] = {} + self.var_to_val: Dict[sympy.Symbol, sympy.Integer] = {} # Maps symbolic ints to their min/max range. These ranges # are conservative: the int MUST fall in the range, but the # range may contain ints which may not actually appear in # practice - self.var_to_range: Dict["sympy.Symbol", ValueRanges] = {} - self.var_to_sources: Dict["sympy.Symbol", List[Source]] = {} - self.var_to_stack: Dict["sympy.Symbol", traceback.StackSummary] = {} + self.var_to_range: Dict[sympy.Symbol, ValueRanges] = {} + self.var_to_sources: Dict[sympy.Symbol, List[Source]] = {} + self.var_to_stack: Dict[sympy.Symbol, traceback.StackSummary] = {} # Maps symbolic ints to the guards that refine their lower/upper # bound. If one of them is None, it means that there are no guards # that refine that respective bound. - self.var_to_guards: Dict["sympy.Symbol", Tuple[Optional[ShapeGuard], Optional[ShapeGuard]]] = {} + self.var_to_guards: Dict[sympy.Symbol, Tuple[Optional[ShapeGuard], Optional[ShapeGuard]]] = {} # Maps from sympy ints to expressions representing them # Populated from equality guards (i.e. a.shape[0] == b.shape[0]) - self.replacements: Dict["sympy.Symbol", "sympy.Expr"] = {} # + self.replacements: Dict[sympy.Symbol, sympy.Expr] = {} # # Set holds a % b expressions that evaluate to 0. - self.divisible: Set["sympy.Expr"] = set() + self.divisible: Set[sympy.Expr] = set() # Duck-shaping says that if two input tensors have the same size, # they get assigned the same symbolic variable - self.val_to_var: Dict[int, "sympy.Expr"] = {} + self.val_to_var: Dict[int, sympy.Expr] = {} if specialize_zero_one: self.val_to_var = {0: sympy.Integer(0), 1: sympy.Integer(1)} self.unbacked_symfloat_counter = itertools.count() diff --git a/torch/fx/node.py b/torch/fx/node.py index e7a8105da4c867..023e5761b60c0c 100644 --- a/torch/fx/node.py +++ b/torch/fx/node.py @@ -198,7 +198,7 @@ def __init__(self, graph: 'Graph', name: str, op: str, target: 'Target', # would appear once here, but represents two uses. # # Is a dict to act as an "ordered set". Keys are significant, value dont-care - self.users : Dict['Node', None] = {} + self.users : Dict[Node, None] = {} # Type expression representing the output value of this node. # This should contain the same class of Type objects that would appear # as type annotations for function inputs/outputs. diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py index bcc601d0cea6ab..96c59c49e08d6e 100644 --- a/torch/fx/passes/graph_drawer.py +++ b/torch/fx/passes/graph_drawer.py @@ -235,7 +235,7 @@ def _tensor_meta_to_label(self, tm) -> str: return result elif isinstance(tm, dict): result = "" - for k, v in tm.items(): + for v in tm.values(): result += self._tensor_meta_to_label(v) return result elif isinstance(tm, tuple): diff --git a/torch/fx/passes/tools_common.py b/torch/fx/passes/tools_common.py index 0af6de5508a822..42032b4b6cad1f 100644 --- a/torch/fx/passes/tools_common.py +++ b/torch/fx/passes/tools_common.py @@ -164,7 +164,7 @@ def __call__(self) -> Dict[torch.fx.Node, NodeSet]: if node not in self.acc_nodes: continue - fusion_group: "FxNetAccFusionsFinder.FusionGroup" = self.FusionGroup( + fusion_group: FxNetAccFusionsFinder.FusionGroup = self.FusionGroup( top_node_idx=self.nodes.index(node), nodes={node}, inputs=set(node.all_input_nodes), diff --git a/torch/jit/_recursive.py b/torch/jit/_recursive.py index ed020332fc5597..e842f024daff01 100644 --- a/torch/jit/_recursive.py +++ b/torch/jit/_recursive.py @@ -112,13 +112,13 @@ def _get_valid_constant(attr, v, owner_type): elif isinstance(v, (tuple, list)): return tuple(_get_valid_constant(attr, x, owner_type) for x in v) constants = ", ".join(torch.typename(typ) for typ in _constant_types) - raise TypeError(textwrap.dedent(""" - '{}' object in attribute '{}.{}' is not a valid constant. + raise TypeError(textwrap.dedent(f""" + '{torch.typename(type(v))}' object in attribute '{owner_type}.{attr}' is not a valid constant. Valid constants are: 1. a nn.ModuleList - 2. a value of type {{{}}} + 2. a value of type {{{constants}}} 3. a list or tuple of (2) - """.format(torch.typename(type(v)), owner_type, attr, constants))) + """)) class SourceContext(torch._C._jit_tree_views.SourceRangeFactory): @@ -509,7 +509,7 @@ def create_script_module_impl(nn_module, concrete_type, stubs_fn): def init_fn(script_module): # Initialize the ScriptModule: # 1. Copy the attributes/parameters/buffers from the original `nn_module` to the new ScriptModule. - for name, (attr_type, is_param) in concrete_type.get_attributes().items(): + for name in concrete_type.get_attributes().keys(): orig_value = getattr(nn_module, name) orig_value = orig_value.value if isinstance(orig_value, torch.jit.Attribute) else orig_value cpp_module.setattr(name, orig_value) diff --git a/torch/masked/maskedtensor/core.py b/torch/masked/maskedtensor/core.py index 42321bb959c4f5..b397af48d31c27 100644 --- a/torch/masked/maskedtensor/core.py +++ b/torch/masked/maskedtensor/core.py @@ -83,7 +83,7 @@ def _helper(a, map_fn): for a in args: impl_args.append(_helper(a, map_fn)) impl_kwargs = {} - for k, v in kwargs.items(): + for k in kwargs.keys(): impl_kwargs[k] = _helper(a, map_fn) return impl_args, impl_kwargs diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index 2e6eaec90f4c26..d1efe56a53e4af 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -58,7 +58,7 @@ def __init__(self, hook: Callable, module: Optional["Module"] = None): self.with_module: bool = False if module is not None: - self.module: weakref.ReferenceType["Module"] = weakref.ref(module) + self.module: weakref.ReferenceType[Module] = weakref.ref(module) self.with_module = True def __call__(self, *args: Any, **kwargs: Any) -> Any: diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py index 211c4e65768f40..fdaca358e8b8ed 100644 --- a/torch/nn/modules/rnn.py +++ b/torch/nn/modules/rnn.py @@ -59,7 +59,7 @@ def __init__(self, mode: str, input_size: int, hidden_size: int, self.dropout = float(dropout) self.bidirectional = bidirectional self.proj_size = proj_size - self._flat_weight_refs: List[Optional[weakref.ReferenceType["Parameter"]]] = [] + self._flat_weight_refs: List[Optional[weakref.ReferenceType[Parameter]]] = [] num_directions = 2 if bidirectional else 1 if not isinstance(dropout, numbers.Number) or not 0 <= dropout <= 1 or \ diff --git a/torch/nn/utils/prune.py b/torch/nn/utils/prune.py index 086f08f0c18b13..1e16f11b6b626e 100644 --- a/torch/nn/utils/prune.py +++ b/torch/nn/utils/prune.py @@ -107,10 +107,8 @@ def _get_composite_method(cls, module, name, *args, **kwargs): found += 1 assert ( found <= 1 - ), "Avoid adding multiple pruning hooks to the\ - same tensor {} of module {}. Use a PruningContainer.".format( - name, module - ) + ), f"Avoid adding multiple pruning hooks to the\ + same tensor {name} of module {module}. Use a PruningContainer." for k in hooks_to_remove: del module._forward_pre_hooks[k] @@ -264,7 +262,7 @@ class PruningContainer(BasePruningMethod): """ def __init__(self, *args): - self._pruning_methods: Tuple["BasePruningMethod", ...] = tuple() + self._pruning_methods: Tuple[BasePruningMethod, ...] = tuple() if not isinstance(args, Iterable): # only 1 item self._tensor_name = args._tensor_name self.add_pruning_method(args) diff --git a/torch/nn/utils/spectral_norm.py b/torch/nn/utils/spectral_norm.py index b9b9dbf9b28805..ea29d095ea1c0f 100644 --- a/torch/nn/utils/spectral_norm.py +++ b/torch/nn/utils/spectral_norm.py @@ -115,7 +115,7 @@ def _solve_v_and_rescale(self, weight_mat, u, target_sigma): @staticmethod def apply(module: Module, name: str, n_power_iterations: int, dim: int, eps: float) -> 'SpectralNorm': - for k, hook in module._forward_pre_hooks.items(): + for hook in module._forward_pre_hooks.values(): if isinstance(hook, SpectralNorm) and hook.name == name: raise RuntimeError(f"Cannot register two spectral_norm hooks on the same parameter {name}") diff --git a/torch/nn/utils/weight_norm.py b/torch/nn/utils/weight_norm.py index 719cf36a133836..d54c34a3e9f205 100644 --- a/torch/nn/utils/weight_norm.py +++ b/torch/nn/utils/weight_norm.py @@ -29,7 +29,7 @@ def compute_weight(self, module: Module) -> Any: def apply(module, name: str, dim: int) -> 'WeightNorm': warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") - for k, hook in module._forward_pre_hooks.items(): + for hook in module._forward_pre_hooks.values(): if isinstance(hook, WeightNorm) and hook.name == name: raise RuntimeError(f"Cannot register two weight_norm hooks on the same parameter {name}") diff --git a/torch/optim/adadelta.py b/torch/optim/adadelta.py index a38337426313db..cbceec8abc3f45 100644 --- a/torch/optim/adadelta.py +++ b/torch/optim/adadelta.py @@ -148,7 +148,7 @@ def step(self, closure=None): \end{aligned} For further details regarding the algorithm we refer to `ADADELTA: An Adaptive Learning Rate Method`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -159,14 +159,14 @@ def step(self, closure=None): lr (float, optional): coefficient that scale delta before it is applied to the parameters (default: 1.0) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} .. _ADADELTA\: An Adaptive Learning Rate Method: https://arxiv.org/abs/1212.5701 - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def adadelta( diff --git a/torch/optim/adagrad.py b/torch/optim/adagrad.py index c1e981809c4ecf..1fd3012cdb7d1f 100644 --- a/torch/optim/adagrad.py +++ b/torch/optim/adagrad.py @@ -160,7 +160,7 @@ def step(self, closure=None): For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -169,14 +169,14 @@ def step(self, closure=None): weight_decay (float, optional): weight decay (L2 penalty) (default: 0) eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-10) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} .. _Adaptive Subgradient Methods for Online Learning and Stochastic Optimization: http://jmlr.org/papers/v12/duchi11a.html - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def adagrad( diff --git a/torch/optim/adam.py b/torch/optim/adam.py index 687d45534ac5c2..e267974ca37ef4 100644 --- a/torch/optim/adam.py +++ b/torch/optim/adam.py @@ -215,7 +215,7 @@ def step(self, closure=None): \end{aligned} For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -228,18 +228,17 @@ def step(self, closure=None): amsgrad (bool, optional): whether to use the AMSGrad variant of this algorithm from the paper `On the Convergence of Adam and Beyond`_ (default: False) - {foreach} - {maximize} - {capturable} - {differentiable} - {fused} + {_foreach_doc} + {_maximize_doc} + {_capturable_doc} + {_differentiable_doc} + {_fused_doc} .. _Adam\: A Method for Stochastic Optimization: https://arxiv.org/abs/1412.6980 .. _On the Convergence of Adam and Beyond: https://openreview.net/forum?id=ryQu7f-RZ - """.format(foreach=_foreach_doc, maximize=_maximize_doc, capturable=_capturable_doc, - differentiable=_differentiable_doc, fused=_fused_doc) + """ def adam(params: List[Tensor], diff --git a/torch/optim/adamax.py b/torch/optim/adamax.py index 1ee927274558f1..f8d1fb0178fdae 100644 --- a/torch/optim/adamax.py +++ b/torch/optim/adamax.py @@ -156,7 +156,7 @@ def step(self, closure=None): \end{aligned} For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -166,14 +166,14 @@ def step(self, closure=None): eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} .. _Adam\: A Method for Stochastic Optimization: https://arxiv.org/abs/1412.6980 - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def adamax( diff --git a/torch/optim/adamw.py b/torch/optim/adamw.py index 7a97e5d6a91363..73fd60ae7f7c85 100644 --- a/torch/optim/adamw.py +++ b/torch/optim/adamw.py @@ -245,7 +245,7 @@ def step(self, closure=None): \end{aligned} For further details regarding the algorithm we refer to `Decoupled Weight Decay Regularization`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -258,21 +258,17 @@ def step(self, closure=None): amsgrad (bool, optional): whether to use the AMSGrad variant of this algorithm from the paper `On the Convergence of Adam and Beyond`_ (default: False) - {maximize} - {foreach} - {capturable} - {differentiable} - {fused} + {_maximize_doc} + {_foreach_doc} + {_capturable_doc} + {_differentiable_doc} + {_fused_doc} .. _Decoupled Weight Decay Regularization: https://arxiv.org/abs/1711.05101 .. _On the Convergence of Adam and Beyond: https://openreview.net/forum?id=ryQu7f-RZ - """.format(maximize=_maximize_doc, - foreach=_foreach_doc, - fused=_fused_doc, - capturable=_capturable_doc, - differentiable=_differentiable_doc) + """ def adamw( diff --git a/torch/optim/asgd.py b/torch/optim/asgd.py index e483e1c31fbc7c..5e140b0ca2ad7a 100644 --- a/torch/optim/asgd.py +++ b/torch/optim/asgd.py @@ -136,7 +136,7 @@ def step(self, closure=None): return loss -ASGD.__doc__ = r"""Implements Averaged Stochastic Gradient Descent. +ASGD.__doc__ = fr"""Implements Averaged Stochastic Gradient Descent. It has been proposed in `Acceleration of stochastic approximation by averaging`_. @@ -149,14 +149,14 @@ def step(self, closure=None): alpha (float, optional): power for eta update (default: 0.75) t0 (float, optional): point at which to start averaging (default: 1e6) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} .. _Acceleration of stochastic approximation by averaging: https://dl.acm.org/citation.cfm?id=131098 - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def asgd( diff --git a/torch/optim/nadam.py b/torch/optim/nadam.py index aeb3fc8b77dd2c..4278bb32bfd5b1 100644 --- a/torch/optim/nadam.py +++ b/torch/optim/nadam.py @@ -136,7 +136,7 @@ def step(self, closure=None): \end{aligned} For further details regarding the algorithm we refer to `Incorporating Nesterov Momentum into Adam`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -147,13 +147,13 @@ def step(self, closure=None): numerical stability (default: 1e-8) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) momentum_decay (float, optional): momentum momentum_decay (default: 4e-3) - {foreach} - {differentiable} + {_foreach_doc} + {_differentiable_doc} .. _Incorporating Nesterov Momentum into Adam: https://openreview.net/forum?id=OM0jvwB8jIp57ZJjtNEZ - """.format(foreach=_foreach_doc, differentiable=_differentiable_doc) + """ def nadam(params: List[Tensor], diff --git a/torch/optim/radam.py b/torch/optim/radam.py index 120620ab949cc1..4e5742636edfea 100644 --- a/torch/optim/radam.py +++ b/torch/optim/radam.py @@ -159,7 +159,7 @@ def step(self, closure=None): This implementation uses the same weight_decay implementation as Adam (were the weight_decay is applied to the gradient) and not the one from AdamW (were weight_decay is applied to the update). This is different from the `author's implementation`_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -169,15 +169,15 @@ def step(self, closure=None): eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8) weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - {foreach} - {differentiable} + {_foreach_doc} + {_differentiable_doc} .. _On the variance of the adaptive learning rate and beyond: https://arxiv.org/abs/1908.03265 .. _author's implementation: https://github.com/LiyuanLucasLiu/RAdam - """.format(foreach=_foreach_doc, differentiable=_differentiable_doc) + """ def radam( diff --git a/torch/optim/rmsprop.py b/torch/optim/rmsprop.py index cec27d95506840..df64a9b44ca993 100644 --- a/torch/optim/rmsprop.py +++ b/torch/optim/rmsprop.py @@ -176,7 +176,7 @@ def step(self, closure=None): learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma` is the scheduled learning rate and :math:`v` is the weighted moving average of the squared gradient. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -188,11 +188,11 @@ def step(self, closure=None): centered (bool, optional) : if ``True``, compute the centered RMSProp, the gradient is normalized by an estimation of its variance weight_decay (float, optional): weight decay (L2 penalty) (default: 0) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def rmsprop( diff --git a/torch/optim/rprop.py b/torch/optim/rprop.py index 93e7241010500a..04c70d057224b1 100644 --- a/torch/optim/rprop.py +++ b/torch/optim/rprop.py @@ -153,7 +153,7 @@ def step(self, closure=None): For further details regarding the algorithm we refer to the paper `A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm `_. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -163,11 +163,11 @@ def step(self, closure=None): (default: (0.5, 1.2)) step_sizes (Tuple[float, float], optional): a pair of minimal and maximal allowed step sizes (default: (1e-6, 50)) - {foreach} - {maximize} - {differentiable} + {_foreach_doc} + {_maximize_doc} + {_differentiable_doc} - """.format(foreach=_foreach_doc, maximize=_maximize_doc, differentiable=_differentiable_doc) + """ def rprop( params: List[Tensor], diff --git a/torch/optim/sgd.py b/torch/optim/sgd.py index 1f679ffb994f1a..326186dc5d1f17 100644 --- a/torch/optim/sgd.py +++ b/torch/optim/sgd.py @@ -127,7 +127,7 @@ def step(self, closure=None): Nesterov momentum is based on the formula from `On the importance of initialization and momentum in deep learning`__. - """ + r""" + """ + fr""" Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups @@ -136,10 +136,10 @@ def step(self, closure=None): weight_decay (float, optional): weight decay (L2 penalty) (default: 0) dampening (float, optional): dampening for momentum (default: 0) nesterov (bool, optional): enables Nesterov momentum (default: False) - {maximize} - {foreach} - {differentiable} - """.format(maximize=_maximize_doc, foreach=_foreach_doc, differentiable=_differentiable_doc) + r""" + {_maximize_doc} + {_foreach_doc} + {_differentiable_doc} + """ + r""" Example: >>> # xdoctest: +SKIP diff --git a/torch/optim/sparse_adam.py b/torch/optim/sparse_adam.py index c68441cb389c04..5f088a05b5d8ed 100644 --- a/torch/optim/sparse_adam.py +++ b/torch/optim/sparse_adam.py @@ -97,7 +97,7 @@ def step(self, closure=None): return loss -SparseAdam.__doc__ = r"""SparseAdam implements a masked version of the Adam algorithm +SparseAdam.__doc__ = fr"""SparseAdam implements a masked version of the Adam algorithm suitable for sparse gradients. Currently, due to implementation constraints (explained below), SparseAdam is only intended for a narrow subset of use cases, specifically parameters of a dense layout with gradients of a sparse layout. This occurs in a @@ -150,9 +150,9 @@ def step(self, closure=None): running averages of gradient and its square (default: (0.9, 0.999)) eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8) - {maximize} + {_maximize_doc} .. _Adam\: A Method for Stochastic Optimization: https://arxiv.org/abs/1412.6980 - """.format(maximize=_maximize_doc) + """ diff --git a/torch/package/package_exporter.py b/torch/package/package_exporter.py index ebd24383e0b53f..684b9f4fcbfa5d 100644 --- a/torch/package/package_exporter.py +++ b/torch/package/package_exporter.py @@ -998,7 +998,7 @@ def _write(self, filename, str_or_bytes): def _validate_dependency_graph(self): # 1. Check the graph for any errors inserted during dependency analysis. - for module_name, attrs in self.dependency_graph.nodes.items(): + for attrs in self.dependency_graph.nodes.values(): if "error" in attrs: raise PackagingError(self.dependency_graph, debug=self.debug) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 891c878cc5f059..91fa699a6d22a7 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -1136,7 +1136,7 @@ class precisionOverride: def __init__(self, d): assert isinstance(d, dict), "precisionOverride not given a dtype : precision dict!" - for dtype, prec in d.items(): + for dtype in d.keys(): assert isinstance(dtype, torch.dtype), f"precisionOverride given unknown dtype {dtype}" self.d = d diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index aad7679c544611..2c3482b46700a1 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -152,7 +152,7 @@ def decorator(func): @wraps(func) def wrapper(*args, **kwargs): try: - from transformers import ( # noqa: Unused + from transformers import ( # noqa: F401 AutoModelForMaskedLM, BertConfig, ) @@ -760,7 +760,7 @@ def _join_processes(self, fn) -> None: self._check_return_codes(elapsed_time) finally: # Close all pipes - for pid, pipe in self.pid_to_pipe.items(): + for pipe in self.pid_to_pipe.values(): pipe.close() def _check_no_test_errors(self, elapsed_time) -> None: diff --git a/torch/testing/_internal/distributed/rpc/rpc_test.py b/torch/testing/_internal/distributed/rpc/rpc_test.py index f2ef164d96ff8f..bc3bbe2ebcf209 100644 --- a/torch/testing/_internal/distributed/rpc/rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/rpc_test.py @@ -264,7 +264,7 @@ def my_complex_tensor_function(list_input, tensor_class_input, dict_input): res = list_input[0] for t in list_input: res += t - for k, v in dict_input.items(): + for v in dict_input.values(): res += v complex_tensors = tensor_class_input.tensors return (res, complex_tensors[0], complex_tensors[1], complex_tensors[2]) diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py index bdeace15a71d3c..f96c2fb436be9d 100644 --- a/torch/testing/_internal/jit_utils.py +++ b/torch/testing/_internal/jit_utils.py @@ -868,7 +868,7 @@ def get_traced_sample_variant_pairs(device, dtype, op): return outputs for sample in samples: - for func_type, variant in variants.items(): + for variant in variants.values(): if variant is None: continue diff --git a/torch/utils/_pytree.py b/torch/utils/_pytree.py index 3ecdc06d9bf77d..2c7e969b829976 100644 --- a/torch/utils/_pytree.py +++ b/torch/utils/_pytree.py @@ -284,7 +284,7 @@ def tree_flatten(pytree: PyTree) -> Tuple[List[Any], TreeSpec]: # Recursively flatten the children result : List[Any] = [] - children_specs : List['TreeSpec'] = [] + children_specs : List[TreeSpec] = [] for child in child_pytrees: flat, child_spec = tree_flatten(child) result += flat diff --git a/torch/utils/benchmark/utils/common.py b/torch/utils/benchmark/utils/common.py index c1636ddb78a2bf..b8134c599d66cb 100644 --- a/torch/utils/benchmark/utils/common.py +++ b/torch/utils/benchmark/utils/common.py @@ -233,7 +233,7 @@ def merge(measurements: Iterable["Measurement"]) -> List["Measurement"]: Merge will extrapolate times to `number_per_run=1` and will not transfer any metadata. (Since it might differ between replicates) """ - grouped_measurements: DefaultDict[TaskSpec, List["Measurement"]] = collections.defaultdict(list) + grouped_measurements: DefaultDict[TaskSpec, List[Measurement]] = collections.defaultdict(list) for m in measurements: grouped_measurements[m.task_spec].append(m) diff --git a/torch/utils/data/datapipes/utils/common.py b/torch/utils/data/datapipes/utils/common.py index cfa6cd95b524dc..f1650ffb5b60fd 100644 --- a/torch/utils/data/datapipes/utils/common.py +++ b/torch/utils/data/datapipes/utils/common.py @@ -322,7 +322,7 @@ def close_streams(cls, v, depth=0): else: # Traverse only simple structures if isinstance(v, dict): - for kk, vv in v.items(): + for vv in v.values(): cls.close_streams(vv, depth=depth + 1) elif isinstance(v, (list, tuple)): for vv in v: diff --git a/torch/utils/dlpack.py b/torch/utils/dlpack.py index a987bca6dcd51b..6bfa4b9f85bd6f 100644 --- a/torch/utils/dlpack.py +++ b/torch/utils/dlpack.py @@ -107,7 +107,7 @@ def from_dlpack(ext_tensor: Any) -> 'torch.Tensor': # attribute, but it is not documented # The array API specify that the default legacy stream must be passed # with a value of 1 for CUDA - # https://data-apis.org/array-api/latest/API_specification/array_object.html?dlpack-self-stream-none#dlpack-self-stream-none # NOQA + # https://data-apis.org/array-api/latest/API_specification/array_object.html?dlpack-self-stream-none#dlpack-self-stream-none is_cuda = device[0] == DLDeviceType.kDLGPU # Since pytorch is not using PTDS by default, lets directly pass # the legacy stream diff --git a/torch/utils/tensorboard/_utils.py b/torch/utils/tensorboard/_utils.py index 3715b7504ff049..2b959726fd2622 100644 --- a/torch/utils/tensorboard/_utils.py +++ b/torch/utils/tensorboard/_utils.py @@ -98,10 +98,8 @@ def convert_to_HWC(tensor, input_format): # tensor: numpy array ), f"You can not use the same dimension shordhand twice. input_format: {input_format}" assert len(tensor.shape) == len( input_format - ), "size of input tensor and input format are different. \ - tensor shape: {}, input_format: {}".format( - tensor.shape, input_format - ) + ), f"size of input tensor and input format are different. \ + tensor shape: {tensor.shape}, input_format: {input_format}" input_format = input_format.upper() if len(input_format) == 4: diff --git a/torchgen/native_function_generation.py b/torchgen/native_function_generation.py index 653d7b295629cb..ea38fa3fdcb42b 100644 --- a/torchgen/native_function_generation.py +++ b/torchgen/native_function_generation.py @@ -374,7 +374,7 @@ def add_generated_native_functions( # First we group of NaitveFunctions by schema kind, # then we detect which ones are missing and generate them. pre_grouped_native_functions = pre_group_native_functions(rs) - for k, d in pre_grouped_native_functions.items(): + for d in pre_grouped_native_functions.values(): has_functional = SchemaKind.functional in d has_inplace = SchemaKind.inplace in d has_mutable = SchemaKind.mutable in d diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py index 13910db85c9871..dab15685804ea2 100644 --- a/torchgen/operator_versions/gen_mobile_upgraders.py +++ b/torchgen/operator_versions/gen_mobile_upgraders.py @@ -307,7 +307,7 @@ def get_upgrader_bytecode_function_to_index_map( upgrader_bytecode_function_to_index_map = {} index = 0 for upgrader_bytecode in upgrader_dict: - for upgrader_name, bytecode in upgrader_bytecode.items(): + for upgrader_name in upgrader_bytecode.keys(): if upgrader_name in EXCLUE_UPGRADER_SET: continue upgrader_bytecode_function_to_index_map[upgrader_name] = index