
;;;************************************************************************
;;;                                                                       *
;;;    Copyrighted Cornell University 2003                                *
;;;                                                                       *
;;;                                                                       *
;;;                Formal Digital Library System                          *
;;;                ------------------------------                         *
;;;                                                                       *
;;;   Developed by the FDL group, Department of Computer Science,         *
;;;   Cornell University, Ithaca NY.  See the release notes for a list    *
;;;   of the members of the group.                                        *
;;;                                                                       *
;;;   Permission is granted to use and modify FDL provided this notice    *
;;;   is retained in derived works.                                       *
;;;                                                                       *
;;;                                                                       *
;;;************************************************************************

#+cltl2(in-system-package)
#-cltl2(in-package *system-package-name*)


;;;;	
;;;;	file format
;;;;	
;;;;	<item>		: <level'> | <term'>
;;;;	
;;;;	<term'>		: #\t <term> #\t
;;;;	
;;;;	<level'>	: #\l <level> #\l
;;;;	
;;;;	<level>		: !il{<index>:n}
;;;;			| !data_persist{<type>:token}(<stamp>)
;;;;
;;;;	<level-file>	: <level'><term'{level-data}>
;;;;			| <term'{level-data}>
;;;;
;;;;	<level-data>	:!level{<index>:n,<size:n>}(il{<index>:n})
;;;;			 !symbols{...}
;;;;			 !parameters{ ... }
;;;;			 !operators{}(...)
;;;;			 !terms{}(...)
;;;;
;;;;	
;;;;	read_static_level <level>	: <levels>
;;;;	  * starts with clean slate but may read other levels.
;;;;	  * replaces current levels.
;;;;	


;;;;	TODO : worry about stuff accumulating in level0 hash tables.
;;;;	 - maybe have non-level hash tables which rollover occasionally
;;;;	   and stuff gets flushed remains or promoted to a level.
;;;;
;;;; todo fttb assoc list later hash on stamps.
;;;;	
;;;;	it is possible for lower levels to be shared.
;;;;	But if two intermediate levels have the same stamped then their lower
;;;;	levels must be identical.

(define-primitive |!level| ((natural . index) (natural . size)) (short))

(defvar *levels-cache* nil)

;;; todo : device some array lookup instead of assoc when given index.

(defun levels-lookup (key)
  (if (numberp key)
      (cdr (assoc key *levels-cache*))
      (cdr (assoc key *levels-cache* :test #'equal-stamps-pf))))

(defun levels-update (stamp levels)
  (let ((short (short-term-of-compression-levels levels)))
    (setf *levels-cache* (acons stamp levels *levels-cache*))
    (when short
      (setf *levels-cache* (acons (index-of-il-term short) levels *levels-cache*))))

  levels)

(defun read-level (term &optional levels)
  (declare (ignore levels));; fttb

  (cond
    ((il-term-p term)
     (or (levels-lookup (index-of-il-term term))
	 (read-level (levels-list-assoc term))))

    ((idata-persist-term-p term)
     (let ((type (type-of-idata-persist-term term))
	   (stamp (term-to-stamp (stamp-of-idata-persist-term term))))

       (or (levels-lookup stamp)
	   (levels-update stamp
			  (case type
			    (static (with-cprl-open-in-file (cstream (stamp-to-pathname stamp type))
				      (compression-read-static-level cstream term)))

			    ;; expect some dynamic type which needs levels arg.

			    (otherwise
			     (raise-error
			      (error-message '(compression level read type unknown)
					     type term))))))))

    (t (raise-error
	(error-message '(compression level read input-spec unknown)
		       term)) )))


;; read-term-f may cause recursive call
;; ie tis possible for read-term-f to add a level, if so
;; then must update local levels.
;; but is embedded level higher or lower, higher if occurs after ilevel term
;; lower if before.
(defun compression-read-static-level (cstream persist-term)

  (labels
      ((doread () (cprl-stream-read cstream)))

    (let* ((ilevel (doread))		; may add levels.
	   (level-index (index-of-ilevel-term ilevel)))

      ;;(setf a ilevel)  (break "crsl")
      (let ((levels (new-compression-levels (levels-of-cprl-stream cstream)
					    'in-out
					    nil
					    (size-of-ilevel-term ilevel))))
	(with-levels-out (levels)
	  
	  (cprl-stream-update-levels cstream levels) ; need new level during read.
	
	  (let ((level (level-of-compression-levels levels level-index)))

	    (set-short-term-of-compression-level level
						 (short-of-ilevel-term ilevel))
	    (set-persist-term-of-compression-level level persist-term)
    
	    (unless (= level-index (1- (count-of-compression-levels levels)))
	      ;; ?? sensible to truncate input levels ??
	      (break "compression level mismatch"))

	    (dolist (symparm (parameters-of-term (setf css (doread))))
	      (compression-static-symbol-update symparm))
	 
	    (let ((byte (add-byte *compression-parameter-type* level-index)))
	      (dolist (parm (parameters-of-term (setf csp (doread))))
		(compression-level-static-update parm *compression-parameter-type*)
		;;(compression-level-update-table levels level-index parm *compression-parameter-type*
		;;(compression-level-update-array level byte parm))
		))
      
	    (let ((byte (add-byte *compression-operator-type* level-index)))
	      (dolist (bt (bound-terms-of-term (setf cso (doread))))
		(let ((op (operator-of-term (term-of-bound-term bt))))
		  (compression-level-static-update op *compression-operator-type*)
		  ;;(compression-level-update-table levels level-index op *compression-operator-type*
		  ;;(compression-level-update-array level byte op))
		  )))

	    (let ((byte (add-byte *compression-term-type* level-index)))
	      (dolist (bt (bound-terms-of-term (setf cst (doread))))
		(let ((term (term-of-bound-term bt)))
		  (compression-level-static-update term *compression-term-type*)
		  ;;(compression-level-update-table levels level-index term *compression-term-type*
		  ;;			(compression-level-update-array level byte term))
		  )))

	    (setf (compression-table-open (table-of-compression-level level)) nil)
	    (table-clean-close (table-of-compression-level level))
	  
	    (setf aa levels)
	    (with-ignore (compression-level-sanity-check levels level-index))
	    levels))))))




(defvar *compression-training-tables* nil)

(defun compression-training-tables-p ()
  (not (null *compression-training-tables*)))



(defun compression-static-symbol-parameter (s)
  (cond
    ((extended-parameter-value-p s)
     (instantiate-parameter s *variable-type*))
    ((variable-id-p s)
     (instantiate-parameter s *variable-type*))
    ((meta-variable-id-p s)
     (instantiate-parameter s *variable-type*))
    ((symbolp s)
     (instantiate-parameter s *token-type*))
    (t (system-error '(write-compression-table)))))

(defun compression-static-symbol-update (p)
  (compression-level-static-update (value-of-parameter-n p)
				   (if (variable-parameter-p p)
				       *compression-binding-type*
				       *compression-opid-type*))

    #+foo(compression-level-update-table levels level-index structure
					 (if variable-p
					     *compression-binding-type*
					     *compression-opid-type*)
					 (compression-level-update-array level
									 (if variable-p
									     (add-byte *compression-binding-type*
										       level-index)
									     (add-byte *compression-opid-type*
										       level-index))
									 structure))
    )


(defun histo-report (h index &optional all)
  (let ((sum 0)
	(cutoff 0)
	(p nil))
    
    (terpri)
    (format t ";;;;	~%")
    (format t ";;;;	Level ~a histogram. ~%" index)
    (format t ";;;;	~%")
    (format t ";;;;	~%")

    (dolist (pool h)

      (when (and (null p) ;; avoid reseting cutoff.
		 (< (car pool) 8
		    ;;(length (cdr pool))
		    ))
	(setf cutoff (car pool))
	(setf p t))

      (unless (and p (not all))
	(unless p
	  (incf sum (length (cdr pool))))
	(format t ";;;;	Hits ~7:D, pool ~5:D~%" (car pool) (length (cdr pool)))))

    (format t ";;;;	~%")
    (format t ";;;;	Hits Cutoff	~7:D.~%" cutoff)
    (format t ";;;;	Pool Sum	~7:D.~%" sum)))


;;;;	avoid time,oid parameters. avoid !stamp terms 
;;;;	
;;;;	<compression-heuristics>	: (<ch-lambdas{avoid}> . <ch-lambdas{attract}>)
;;;;	
;;;;	<ch-lambdas>	: <closure{symbol}>
;;;;			  <closure{binding}>
;;;;			  <closure{parameter}>
;;;;			  <closure{op}>
;;;;			  <closure{term}> 
;;;;	

(defstruct compression-lambdas
  token binding parameter operator term)

(defun new-compression-lambdas (tok b p op term)
  (make-compression-lambdas :token tok :binding b :parameter p :operator op :term term))

(defun test-compression-heuristics (heuristics type structure curbool)
  (if heuristics
      (let ((cls (if curbool (car heuristics) (cdr heuristics))))
	(if (null cls)
	    curbool
	    (let ((cl (case type
			(symbol (compression-lambdas-token cls))
			(binding (compression-lambdas-binding cls))
			(parameter (compression-lambdas-parameter cls))
			(operator (compression-lambdas-operator cls))
			(term (compression-lambdas-term cls)))))
	      (if cl
		  (funcall cl structure)
		  curbool))))
      curbool))


;; return t allow and nil to avoid.
(defun oid-avoid-compression-heuristics ()
  (cons (new-compression-lambdas #'(lambda (s)
				     ;; allow strings so they will be shared.
				     ;;(setf a s) (break)
				     (not (and (symbolp s) (< (length (string s)) 4))))
				 nil
				 #'(lambda (p)
				     (not (or (time-parameter-p p) (oid-parameter-p p))))
				 #'(lambda (op)
				     (not (or (eql *istamp* (id-of-operator op))
					      (eql *ioid* (id-of-operator op)))))
				 #'(lambda (term)
				     (not (or (istamp-term-p term)
					      (eql *ioid* (id-of-term term))))))
	nil))
  


(defun compression-write-static-level (cstream levels index stamp &optional heuristics)

  (when (> index 0)
    (cprl-stream-update-levels cstream (write-level levels (1- index) 'static))
    (cprl-stream-send-levels cstream levels (1- index))

    ;; need cstream to reflect levels -1
    )

  ;; TODO : this has not been tested since with-levels-out implemented. I'd expect some problems.
  (with-levels-out (levels)
    (let ((histogram nil)
	  (tables (eq-of-compression-table (table-of-compression-levels levels index))))

      ;; build histogram
      (dotimes (i *num-kinds-of-compression-table*)
	(maphash #'(lambda (k chte)
		     (declare (ignore k))
		     (let ((count (count-of-chte chte)))
		       (when (> count 2)
			 (let ((pool (assoc count histogram)))
			   (if pool
			       (setf (cdr pool) (cons chte (cdr pool)))
			       (setf histogram (cons (cons count (list chte)) histogram)))))))
		 (aref tables i)))

      ;;(setf aa (sort histogram #'> :key #'car) a symbols b parameters c ops d terms) (break "cwsl")

      (let ((available-indices *compression-level-size*)
	    (symbols nil)
	    (parameters nil)
	    (ops nil)
	    (terms nil))

	;; <histogram>		: (<count> . <chte> list) list

	(let ((histosort (sort histogram #'> :key #'car)))
	  (histo-report histosort index)
	
	  (do ((l histosort (cdr l)))
	      ((or (<= available-indices 0)
		   (null l)
		   ;; heusristic: stop when the number of structures exceeds the number of
		   ;;    hits of those structures.
		   ;; ie avoid saving a lot of things hit a few times.
		   (< (caar l)		;(length (cdar l))
		      8
		      )))
	    (let* ((lchtes (cdr (car l)))
		   (num 0))
	      (dolist (chte (if (>= 0 (- available-indices num))
				(subseq lchtes 0 available-indices)
				lchtes))
		(let ((actual (actual-of-chte chte)))
		  (cond
		    ((or (symbolp actual)
			 (extended-parameter-value-p actual))
		     (when (and heuristics (test-compression-heuristics heuristics 'symbol actual t))
		       (incf num)
		       (push actual symbols)))
		    ((parameter-p actual)
		     (when (and heuristics (test-compression-heuristics heuristics 'parameter actual t))
		       (incf num)
		       (push actual parameters)))
		    ((consp actual)
		     (when (and heuristics (test-compression-heuristics heuristics 'operator actual t))
		       (incf num)
		       (push actual ops)))
		    ((term-p actual)
		     (when (and heuristics (test-compression-heuristics heuristics 'term actual t))
		       (incf num)
		       (push actual terms))))))
		    
	      (setf available-indices (- available-indices num)))))

	(let* ((size  (+ (length symbols)
			 (length parameters)
			 (length ops)
			 (length terms)))

	       (clevels (new-compression-levels (levels-of-cprl-stream cstream)
						'in-out nil size))
	       (clevel (level-of-compression-levels clevels index)))

	    
	  (cprl-stream-update-levels cstream clevels)
	    
	  (set-persist-term-of-compression-level clevel
						 (idata-persist-term 'static
								     (stamp-to-term stamp)))
	  (let ((aterm (levels-list-add (persist-term-of-compression-level clevel))))
	    (cprl-stream-write cstream (ilevel-term index size (short-of-ilevel-assoc-term aterm))))
	
	  ;; symbols
	  (cprl-stream-write
	   cstream
	   (instantiate-term
	    (instantiate-operator '|!symbols|
				  (mapcar #'(lambda (s)
					      (let ((p (compression-static-symbol-parameter s)))
						(compression-static-symbol-update p)
						p))
					  symbols))))

	  ;; parameters
	  (cprl-stream-write cstream
			     (instantiate-term (instantiate-operator '|!parameters| parameters)))

	  (dolist (parm parameters)
	    (compression-level-static-update parm *compression-parameter-type*)
	    ;;(compression-level-update-table clevels index parm *compression-parameter-type*
	    ;;			      (compression-level-update-array clevel byte parm))
	    )

	  ;; operators
	  (cprl-stream-write cstream
			     (instantiate-term (instantiate-operator '|!operators|)
					       (mapcar #'(lambda (o)
							   (instantiate-bound-term
							    (instantiate-term o)))
						       ops)))
	  (dolist (op ops)
	    (compression-level-static-update op *compression-operator-type*)
	    ;;(compression-level-update-table clevels index op *compression-operator-type*
	    ;;(compression-level-update-array clevel byte op))
	    )

	  ;;(break "ff")

	  ;; terms
	  (let ((sterms (sort terms #'< :key #'term-op-count)))
	    (cprl-stream-write cstream
			       (instantiate-term (instantiate-operator '|!terms|)
						 (mapcar #'instantiate-bound-term sterms)))
	    
	    ;;(setf ww sterms) (break)
	    (dolist (term sterms)
	      (compression-level-static-update term *compression-term-type*)
	      ;;(compression-level-update-table clevels index term *compression-term-type*
	      ;;			      (compression-level-update-array clevel byte term))
	      ))
	  clevels)))))



(defmacro with-compression-training (&body body)
  `(let ((*compression-training-tables* (new-compression-levels nil 'out *compression-level-size*)))

    (prog1 (progn ,@body)

      ;;(setf dd *compression-training-tables*)	(break "wct")

      (write-level *compression-training-tables* 0 'static))))


(defun start-compression-training ()
  (if *compression-training-tables*
      (raise-error (error-message '(compression-training start in-progress)))
      (setf *compression-training-tables*
	    (new-compression-levels nil 'out *compression-level-size*))))


(defun stop-compression-training (&optional heuristics)
  (if (null *compression-training-tables*)
      (raise-error (error-message '(compression-training stop none)))
      (progn
	(setf dd *compression-training-tables*)	(break "wct")
	(prog1 (let ((levels *compression-training-tables*))
		 (let ((*compression-training-tables* nil))
		   (let ((pt (persist-term-of-compression-levels
			      (write-level levels 0 'static heuristics)
			      0)))
		   ;; test
		     (with-ignore (compression-level-sanity-check (read-level pt) 0))
		      (put-level-file-pointer pt))))
	  (setf *compression-training-tables* nil)))))


(defun get-levels ()
  (with-ignore
      (read-level (get-level-file-pointer))))

