Spaces:

dx2102
/

search-mathlib

Running

App Files Files Community

search-mathlib / extract.lean

dx2102

Upload 2 files

19cafec verified 5 months ago

raw

history blame

3.85 kB

	import Lean
	import Mathlib.Lean.Expr.Basic
	import Mathlib.Lean.CoreM
	import Batteries.Lean.Util.Path
	import Batteries.Data.String.Matcher

	open Lean
	open Lean.Meta
	open Lean.Elab
	open Lean.Elab.Command

	/-
	This file is modified from: https://github.com/semorrison/lean-training-data

	To run this, prepare a Lean project with Mathlib installed. Suppose the project is called LeanProj.

	Put this extract.lean in LeanProj/scripts/extract.lean

	In LeanProj/lakefile.lean, below
	@[default_target]
	lean_lib «LeanProj» where
	```,

	add this:
	@[default_target]
	lean_exe extract where
	root := `scripts.extract
	supportInterpreter := true
	```

	Now in the terminal, run:
	cd LeanProj
	mkdir data
	lake exe extract > data/theorems.txt

	The stdout (IO.println) will be directed to data/theorems.txt,
	The stderr (IO.eprintln) will be shown in the terminal. You will see the progress. It takes about 15 minutes.
	-/

	/--
	Helps print kinds of constants
	and distinguish between theorems, definitions, axioms, etc.
	-/
	def Lean.ConstantInfo.kind : ConstantInfo → String
	\| .axiomInfo _ => "axiom"
	\| .defnInfo _ => "def"
	\| .thmInfo _ => "theorem"
	\| .opaqueInfo _ => "opaque"
	\| .quotInfo _ => "quot" -- Not sure what this is!
	\| .inductInfo _ => "inductive"
	\| .ctorInfo _ => "constructor"
	\| .recInfo _ => "recursor"

	/-
	Turn Lean.Expr and related types into JSON.
	-/


	def truncatePrint (s : String) : IO Unit := do
	IO.println (if s.length < 1000 then s else s.take 997 ++ " ...")

	def main : IO Unit := do
	IO.eprintln "\n\n"
	IO.eprintln "importing modules"
	searchPathRef.set compile_time_search_path%
	CoreM.withImportModules #[`Mathlib] (options := {entries := [
	-- (`maxHeartbeats, .ofNat 0),
	(`smartUnfolding, .ofBool false),
	]}) do
	IO.eprintln s!"simplification timeout: {← getMaxHeartbeats}"
	let env ← getEnv

	IO.eprintln "filtering out unwanted objects"
	let mut x := 0
	let mut lst := #[]
	lst := lst ++ env.constants.map₁.toArray
	lst := lst ++ env.constants.map₂.toArray
	lst := lst.filter fun (name, info) => (
	!name.isInternalOrNum -- s.startsWith "_" \| .num
	&& !name.isInaccessibleUserName -- s.contains '✝' \|\| s == "_inaccessible"
	&& !name.isImplementationDetail -- s.startsWith "__"
	&& !(name.toString.findSubstr? ".proof_" \|>.isSome)
	&& !(name.toString.findSubstr? ".match_" \|>.isSome)
	)

	-- name.toString.startsWith "Nat.add"
	IO.eprintln s!"printing {lst.size} theorems"
	for (name, info) in lst do
	IO.println "----ast----"
	-- print module.
	-- eg. "import Mathlib.Data.Real.Basic"
	match (← findModuleOf? name) with
	\| .none => IO.println "unknown"
	\| .some mod => IO.println s!"import {mod}"
	IO.println "--ast--"

	-- print kine and decl name
	-- eg. "theorem Nat.add_comm" or "def Nat.add"
	IO.println s!"{info.kind} {name}"
	IO.println "--ast--"

	-- print decl type.
	-- eg. "∀ (n m : Nat), n + m = m + n"
	-- ppExpr is better than dbgToString
	let ppType := toString (← MetaM.run' do ppExpr info.type)
	truncatePrint ppType
	IO.println "--ast--"

	-- print doc string if exists.
	-- eg. O(\|l\| + \|r\|). Merge two lists using s as a switch.
	match (← findDocString? env name) with
	\| .none => IO.println ""
	\| .some doc => IO.println doc
	-- IO.println "--ast--"

	-- print decl value. These Lean.Expr are very large.
	-- match info.value? with
	-- \| .none => IO.println ""
	-- \| .some val => IO.println (toString (← MetaM.run' do ppExpr val))
	-- IO.println "--ast--"

	x := x + 1
	if x % 100 == 0 then
	IO.eprintln s!"printed {x} / {lst.size} theorems, {x.toFloat / lst.size.toFloat * 100}%"
	-- if x ≥ 2 then break
	IO.eprintln "done"